# Setup

In [None]:
# os.environ['LC_ALL'] = 'C.UTF-8'
# os.environ['LANG'] = 'C.UTF-8'

In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


Get MusicGen git repo

In [None]:
!python3 -m pip install -U git+https://github.com/facebookresearch/audiocraft#egg=audiocraft

Collecting audiocraft
  Cloning https://github.com/facebookresearch/audiocraft to /tmp/pip-install-0me97eqy/audiocraft_8aa769c97b2d4c93a1d7ba1ad7d338cb
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/audiocraft /tmp/pip-install-0me97eqy/audiocraft_8aa769c97b2d4c93a1d7ba1ad7d338cb
  Resolved https://github.com/facebookresearch/audiocraft to commit 72cb16f9fb239e9cf03f7bd997198c7d7a67a01c
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting av==11.0.0 (from audiocraft)
  Downloading av-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops (from audiocraft)
  Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting flashy>=0.0.1 (from audiocraf

Access to Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Imports

In [None]:
import os
import random
from pydub import AudioSegment

import soundfile as sf
import numpy as np

# Data Processing

Create auxiliary folders

In [None]:
new_folders = ["raw", "output", "musicgen_trainer_dir"]

for folder_name in new_folders:
  if not os.path.exists(folder_name):
    os.makedirs(folder_name)
    print(f"'{folder_name}'folder successfully created!")

'raw'folder successfully created!
'output'folder successfully created!
'musicgen_trainer_dir'folder successfully created!


Function to clean a directory

In [None]:
def clean_dir(dir_path):
  for file_name in os.listdir(dir_path):
    file_path = os.path.join(dir_path, file_name)
    if os.path.isfile(file_path):
      os.remove(file_path)
      print(f"File {file_path} was deleted")

Clean output directory

In [None]:
clean_dir('/content/drive/MyDrive/ap-project/raw/raw')

File /content/drive/MyDrive/ap-project/raw/raw/classic, bach.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, brahms.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, cambini.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, ravel.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, haydn.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, dvorak.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, beethoven.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, mozart.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, shubert.wav was deleted
File /content/drive/MyDrive/ap-project/raw/raw/classic, faure.wav was deleted


Remove musicgen_trainer dir and all its content

In [None]:
import shutil

shutil.rmtree("/content/musicgen_trainer_dir")

In case raw and output are not in Drive

In [None]:
!cp -r /content/raw/* "/content/drive/MyDrive/ap-project/raw"

In [None]:
!cp -r /content/output/* "/content/drive/MyDrive/ap-project/output"

If you upload more audios to raw's folder, clean output's folder before running process_audios function

In [None]:
clean_dir('/content/drive/MyDrive/ap-project/output')

Function to process audio data: segments audio in 30 seconds each and save .txt labels and set sample rate to 32000 Hz

In [None]:
def process_audios(file_path, output_dir, start_segment, segment_length=30): # 30 seconds
  # Load audio
  audio = AudioSegment.from_file(file_path)

  # Extract file name for .txt
  file_name = os.path.splitext(os.path.basename(file_path))[0]

  # Convert segment length to ms
  segment_length_ms = segment_length * 1000

  # Set the sample rate to 32000 Hz
  audio = audio.set_frame_rate(32000)

  # Calculate number of segments
  num_segments = (len(audio) + segment_length_ms - 1) // segment_length_ms

  for i in range(num_segments):
    start_time = i * segment_length_ms

    # Last segment
    if i == num_segments - 1:
      start_time = len(audio) - segment_length_ms

    end_time = start_time + segment_length_ms

    # Get segment
    segment = audio[start_time:end_time]
    print(f"i: {i} file: {file_path} - segment_{start_segment:03d}.wav")

    # Save segment
    segment.export(os.path.join(output_dir, f'segment_{start_segment:03d}.wav'), format='wav')

    # Save label
    with open(os.path.join(output_dir, f'segment_{start_segment:03d}.txt'), 'w') as f:
      f.write(file_name)
    start_segment +=1

  return start_segment

Process all audios

In [None]:
output_dir = '/content/drive/MyDrive/ap-project/output'
samples_dir = '/content/drive/MyDrive/ap-project/raw'

if not os.path.exists(output_dir):
  os.makedirs(output_dir)

start_segment = 0

for file_name in os.listdir(samples_dir):
  print(file_name)
  if file_name.endswith('.wav') or file_name.endswith('.mp3'):
    file_path = os.path.join(samples_dir, file_name)
    current_segment = process_audios(file_path, output_dir, start_segment, segment_length=30)
    start_segment = current_segment


classic, bach.wav
i: 0 file: raw/classic, bach.wav - segment_000.wav
i: 1 file: raw/classic, bach.wav - segment_001.wav
i: 2 file: raw/classic, bach.wav - segment_002.wav
i: 3 file: raw/classic, bach.wav - segment_003.wav
classic, shubert.wav
i: 0 file: raw/classic, shubert.wav - segment_004.wav
i: 1 file: raw/classic, shubert.wav - segment_005.wav
i: 2 file: raw/classic, shubert.wav - segment_006.wav
i: 3 file: raw/classic, shubert.wav - segment_007.wav
i: 4 file: raw/classic, shubert.wav - segment_008.wav
i: 5 file: raw/classic, shubert.wav - segment_009.wav
i: 6 file: raw/classic, shubert.wav - segment_010.wav
i: 7 file: raw/classic, shubert.wav - segment_011.wav
i: 8 file: raw/classic, shubert.wav - segment_012.wav
i: 9 file: raw/classic, shubert.wav - segment_013.wav
i: 10 file: raw/classic, shubert.wav - segment_014.wav
i: 11 file: raw/classic, shubert.wav - segment_015.wav
i: 12 file: raw/classic, shubert.wav - segment_016.wav
classic, ravel.wav
i: 0 file: raw/classic, ravel.wav

In [None]:
import librosa

output_dir = "/content/drive/MyDrive/ap-project/output"

for file_name in os.listdir(output_dir):
  if file_name.endswith('.wav'):
    file_path = os.path.join(output_dir, file_name)
    audio, sample_rate = librosa.load(file_path, sr=None)

    if audio.shape[0] == 32000 * 30:
      print(f"{file_name} has the correct shape: {audio.shape[0]}")
    else:
      print(f"{file_name} does not have the correct shape: {audio.shape[0]}")


segment_000.wav has the correct shape: 960000
segment_001.wav has the correct shape: 960000
segment_002.wav has the correct shape: 960000
segment_003.wav has the correct shape: 960000
segment_004.wav has the correct shape: 960000
segment_005.wav has the correct shape: 960000
segment_006.wav has the correct shape: 960000
segment_007.wav has the correct shape: 960000
segment_008.wav has the correct shape: 960000
segment_009.wav has the correct shape: 960000
segment_010.wav has the correct shape: 960000
segment_011.wav has the correct shape: 960000
segment_012.wav has the correct shape: 960000
segment_013.wav has the correct shape: 960000
segment_014.wav has the correct shape: 960000
segment_015.wav has the correct shape: 960000
segment_016.wav has the correct shape: 960000
segment_017.wav has the correct shape: 960000
segment_018.wav has the correct shape: 960000
segment_019.wav has the correct shape: 960000
segment_020.wav has the correct shape: 960000
segment_021.wav has the correct sh

# Training process

Get MusicGen Trainer

In [None]:
!cd "/content/musicgen_trainer_dir" && git clone https://github.com/chavinlo/musicgen_trainer.git

Cloning into 'musicgen_trainer'...
remote: Enumerating objects: 166, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 166 (delta 14), reused 19 (delta 6), pack-reused 136[K
Receiving objects: 100% (166/166), 10.53 MiB | 13.63 MiB/s, done.
Resolving deltas: 100% (93/93), done.


In [None]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.2.0-py2.py3-none-any.whl (281 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.1/281.1 kB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86

In [None]:
import wandb

In [None]:
!python /content/musicgen_trainer_dir/musicgen_trainer/run.py --dataset_path /content/drive/MyDrive/ap-project/output --epochs 25 --batch_size 2

2024-05-16 19:07:50.251739: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-16 19:07:50.251789: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-16 19:07:50.356474: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
state_dict.bin: 100% 841M/841M [00:07<00:00, 109MB/s] 
spiece.model: 100% 792k/792k [00:00<00:00, 4.63MB/s]
tokenizer.json: 100% 1.39M/1.39M [00:00<00:00, 4.87MB/s]
config.json: 100% 1.21k/1.21k [00:00<00:00, 8.32MB/s]
model.safetensors: 100% 892M/892M [00:08<00:00, 111MB/s] 
compression_state_dict.bin: 100% 236M/236M [00:01<00:00, 125MB/s]
Tuning everything
Epoc

In [None]:
from audiocraft.models import musicgen
from audiocraft.utils.notebook import display_audio
import torch

Load model

In [None]:
model = musicgen.MusicGen.get_pretrained('small', device='cuda')
model.set_generation_params(duration=8)
model.lm.load_state_dict(torch.load('models/lm_final.pt'))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


<All keys matched successfully>

Prompt

In [None]:
res = model.generate([
    'classic, beethoven and bach'
],
  progress=True)

display_audio(res, 32000)

