# Setup

In [None]:
# os.environ['LC_ALL'] = 'C.UTF-8'
# os.environ['LANG'] = 'C.UTF-8'

In [None]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


Get MusicGen git repo

In [None]:
!python3 -m pip install -U git+https://github.com/facebookresearch/audiocraft#egg=audiocraft

Collecting audiocraft
  Cloning https://github.com/facebookresearch/audiocraft to /tmp/pip-install-qbdl4omf/audiocraft_8e75b95f636a4c239fe60e3cc6345064
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/audiocraft /tmp/pip-install-qbdl4omf/audiocraft_8e75b95f636a4c239fe60e3cc6345064
  Resolved https://github.com/facebookresearch/audiocraft to commit 72cb16f9fb239e9cf03f7bd997198c7d7a67a01c
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting av==11.0.0 (from audiocraft)
  Downloading av-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops (from audiocraft)
  Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting flashy>=0.0.1 (from audiocraf

Access to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Imports

In [None]:
import os
import random
from pydub import AudioSegment

import soundfile as sf
import numpy as np

# Data Processing

Create auxiliary folders if necessary

In [None]:
new_folders = ["raw", "output", "musicgen_trainer_dir"]

for folder_name in new_folders:
  if not os.path.exists(folder_name):
    os.makedirs(folder_name)
    print(f"'{folder_name}'folder successfully created!")

'raw'folder successfully created!
'output'folder successfully created!
'musicgen_trainer_dir'folder successfully created!


Function to clean a directory

In [None]:
def clean_dir(dir_path):
  for file_name in os.listdir(dir_path):
    file_path = os.path.join(dir_path, file_name)
    if os.path.isfile(file_path):
      os.remove(file_path)
      print(f"File {file_path} was deleted")

Clean output directory

In [None]:
clean_dir('/content/drive/MyDrive/ap-project/output')

File /content/drive/MyDrive/ap-project/output/other/segment_000.wav was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_000.txt was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_001.wav was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_001.txt was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_002.wav was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_002.txt was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_003.wav was deleted
File /content/drive/MyDrive/ap-project/output/other/segment_003.txt was deleted


Remove musicgen_trainer dir and all its content

In [None]:
import shutil

shutil.rmtree("/content/musicgen_trainer_dir")

In case raw and output are not in Drive

In [None]:
!cp -r /content/raw/* "/content/drive/MyDrive/ap-project/raw"

In [None]:
!cp -r /content/output/* "/content/drive/MyDrive/ap-project/output"

If you upload more audios to raw's folder, clean output's folder before running process_audios function

In [None]:
clean_dir('/content/drive/MyDrive/ap-project/output')

Function to process audio data: segments audio in 30 seconds each and save .txt labels and set sample rate to 32000 Hz

In [None]:
def process_audios(file_path, output_dir, start_segment, segment_length=30): # 30 seconds
  # Load audio
  audio = AudioSegment.from_file(file_path)

  # Extract file name for .txt
  file_name = os.path.splitext(os.path.basename(file_path))[0]

  # Convert segment length to ms
  segment_length_ms = segment_length * 1000

  # Set the sample rate to 32000 Hz
  audio = audio.set_frame_rate(32000)

  # Calculate number of segments
  num_segments = (len(audio) + segment_length_ms - 1) // segment_length_ms

  for i in range(num_segments):
    start_time = i * segment_length_ms

    # Last segment
    if i == num_segments - 1:
      start_time = len(audio) - segment_length_ms

    end_time = start_time + segment_length_ms

    # Get segment
    segment = audio[start_time:end_time]
    print(f"i: {i} file: {file_path} - segment_{start_segment:03d}.wav")

    # Save segment
    segment.export(os.path.join(output_dir, f'segment_{start_segment:03d}.wav'), format='wav')

    # Save label
    with open(os.path.join(output_dir, f'segment_{start_segment:03d}.txt'), 'w') as f:
      f.write(file_name)
    start_segment +=1

  return start_segment

Process all audios

In [None]:
output_dir = '/content/drive/MyDrive/ap-project/output/desgarrada'
samples_dir = '/content/drive/MyDrive/ap-project/raw/desgarrada'

if not os.path.exists(output_dir):
  os.makedirs(output_dir)

start_segment = 0

for file_name in os.listdir(samples_dir):
  print(file_name)
  if file_name.endswith('.wav') or file_name.endswith('.mp3'):
    file_path = os.path.join(samples_dir, file_name)
    current_segment = process_audios(file_path, output_dir, start_segment, segment_length=30)
    start_segment = current_segment


.ipynb_checkpoints
desgarrada, peter lamego.mp3
i: 0 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_000.wav
i: 1 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_001.wav
i: 2 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_002.wav
i: 3 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_003.wav
i: 4 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_004.wav
i: 5 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_005.wav
i: 6 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_006.wav
i: 7 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_007.wav
i: 8 file: /content/drive/MyDrive/ap-project/raw/other/desgarrada, peter lamego.mp3 - segment_008.wav
i: 9 file: /content/drive/MyDrive/

In [None]:
import librosa

output_dir = "/content/drive/MyDrive/ap-project/output/desgarrada"

for file_name in os.listdir(output_dir):
  if file_name.endswith('.wav'):
    file_path = os.path.join(output_dir, file_name)
    audio, sample_rate = librosa.load(file_path, sr=None)

    if audio.shape[0] == 32000 * 30:
      print(f"{file_name} has the correct shape: {audio.shape[0]}")
    else:
      print(f"{file_name} does not have the correct shape: {audio.shape[0]}")


segment_000.wav has the correct shape: 960000
segment_001.wav has the correct shape: 960000
segment_002.wav has the correct shape: 960000
segment_003.wav has the correct shape: 960000
segment_004.wav has the correct shape: 960000
segment_005.wav has the correct shape: 960000
segment_006.wav has the correct shape: 960000
segment_007.wav has the correct shape: 960000
segment_008.wav has the correct shape: 960000
segment_009.wav has the correct shape: 960000
segment_010.wav has the correct shape: 960000
segment_011.wav has the correct shape: 960000


# Training process

Get MusicGen Trainer

In [None]:
!cd "/content/musicgen_trainer_dir" && git clone https://github.com/chavinlo/musicgen_trainer.git

Cloning into 'musicgen_trainer'...
remote: Enumerating objects: 166, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 166 (delta 14), reused 19 (delta 6), pack-reused 136[K
Receiving objects: 100% (166/166), 10.53 MiB | 24.24 MiB/s, done.
Resolving deltas: 100% (93/93), done.


In [None]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.2.0-py2.py3-none-any.whl (281 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.1/281.1 kB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86

In [None]:
import wandb

In [None]:
!python /content/musicgen_trainer_dir/musicgen_trainer/run.py --dataset_path /content/drive/MyDrive/ap-project/output/desgarrada --epochs 25 --batch_size 2

2024-05-17 14:44:13.136710: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-17 14:44:13.136797: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-17 14:44:13.138376: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Tuning everything
Epoch: 0/25, Batch: 0/6, Loss: 4.286153316497803
Epoch: 0/25, Batch: 1/6, Loss: 4.319931983947754
Epoch: 0/25, Batch: 2/6, Loss: 4.235071182250977
Epoch: 0/25, Batch: 3/6, Loss: 4.269157886505127
Epoch: 0/25, Batch: 4/6, Loss: 4.246388912200928
Epoch: 0/25, Batch: 5/6, Loss: 4.317263126373291
Epoch: 1/25, Batch: 0/6, Loss: 4.217859745025635
Epoc

In [None]:
from audiocraft.models import musicgen
from audiocraft.utils.notebook import display_audio
import torch

Load model

In [None]:
model = musicgen.MusicGen.get_pretrained('small', device='cuda')
model.set_generation_params(duration=8)
model.lm.load_state_dict(torch.load('models/lm_final.pt'))



<All keys matched successfully>

Prompt

Clean results folder if necessary

In [None]:
clean_dir("/content/drive/MyDrive/ap-project/results")

File /content/drive/MyDrive/ap-project/output/desgarrada/segment_000.wav was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_000.txt was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_001.wav was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_001.txt was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_002.wav was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_002.txt was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_003.wav was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_003.txt was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_004.wav was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_004.txt was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_005.wav was deleted
File /content/drive/MyDrive/ap-project/output/desgarrada/segment_

In [None]:
res = model.generate([
    'desgarrada'
],
  progress=True)

sample_rate = 32000

audio_array = res.cpu().numpy()

audio_segment = AudioSegment(
    audio_array.tobytes(),
    frame_rate=sample_rate,
    sample_width=audio_array.dtype.itemsize,
    channels=1
)

output_dir = "/content/drive/MyDrive/ap-project/results"

existing_files = [f for f in os.listdir(output_dir) if f.startswith("audio") and f.endswith(".mp3")]
existing_numbers = [int(f[5:-4]) for f in existing_files if f[5:-4].isdigit()]
next_number = max(existing_numbers, default=0) + 1

output_path = os.path.join(output_dir, f"audio{next_number}.mp3")

audio_segment.export(output_path, format="mp3")

print(f"Audio saved at '{output_path}'")

display_audio(res, 32000)




Audio saved at '/content/drive/MyDrive/ap-project/results/audio1.mp3'
