In [15]:
# Create .jsonl from the extracted features, make a train/test split, and save in the right place.

# set the following variable to True if you want to see a progress bar instead of the printed results:
use_tqdm = False

import os
import json
import random
import librosa
from pydub import AudioSegment
import wave
import re

from functools import partial
from tqdm import tqdm
import numpy as np
tqdm = partial(tqdm, position=0, leave=True)

In [21]:
import pandas as pd
train_labels = pd.read_csv("train_labels.csv")
test_labels = pd.read_csv("test_labels.csv")

In [24]:
# make sure the .jsonl has a place to go
os.makedirs("content/audiocraft/egs/train", exist_ok=True)
os.makedirs("content/audiocraft/egs/eval", exist_ok=True)

train_len = 0
eval_len = 0

dataset_path = "data/wav_files/wav-48/"

with open("content/audiocraft/egs/train/data.jsonl", "w") as train_file:
    for filename, caption in tqdm(zip(train_labels["ytid"], train_labels["caption"])):

        # get key and BPM
        y, sr = librosa.load(os.path.join(dataset_path, f"{filename}.wav"))
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        key = np.argmax(np.sum(chroma, axis=1))
        key = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'][key]
        length = librosa.get_duration(y=y, sr=sr)

        # populate json
        entry = {
            "key": f"{key}",
            "artist": "",
            "sample_rate": 44100,
            "file_extension": "wav",
            "description": caption,
            "keywords": "",
            "duration": length,
            "bpm": "",
            "genre": "",
            "title": "",
            "name": "",
            "instrument": "",
            "moods": "",
            "path": os.path.join(dataset_path, f"{filename}.wav"),
        }
#         print(entry)

        train_len += 1
        train_file.write(json.dumps(entry) + '\n')

print(train_len)

4063it [08:05,  8.36it/s]

4063





In [25]:
with open("content/audiocraft/egs/eval/data.jsonl", "w") as eval_file:
    for filename, caption in tqdm(zip(test_labels["ytid"], test_labels["caption"])):

        # get key and BPM
        y, sr = librosa.load(os.path.join(dataset_path, f"{filename}.wav"))
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        key = np.argmax(np.sum(chroma, axis=1))
        key = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'][key]
        length = librosa.get_duration(y=y, sr=sr)

        # populate json
        entry = {
            "key": f"{key}",
            "artist": "",
            "sample_rate": 44100,
            "file_extension": "wav",
            "description": caption,
            "keywords": "",
            "duration": length,
            "bpm": "",
            "genre": "",
            "title": "",
            "name": "",
            "instrument": "",
            "moods": "",
            "path": os.path.join(dataset_path, f"{filename}.wav"),
        }
#         print(entry)

        eval_len += 1
        eval_file.write(json.dumps(entry) + '\n')

print(eval_len)

1016it [02:11,  7.72it/s]

1016





In [26]:
# clear cuda mem for finetuning
from numba import cuda
device = cuda.get_current_device()
device.reset()

In [33]:
%env USER=yimt
# CHANGE THIS

!

command = (
    "HYDRA_FULL_ERROR=1 dora -P audiocraft run"
    " solver=musicgen/musicgen_base_32khz"
    " model/lm/model_scale=small"
    " continue_from=//pretrained/facebook/musicgen-small"
    " conditioner=text2music"
    " dset=audio/random"
    " dataset.num_workers=2"
    " dataset.valid.num_samples=1"
    " dataset.batch_size=1" # CHANGE THIS
    " schedule.cosine.warmup=8"
    " optim.optimizer=adamw" # uses dadaw by default, which is worse for single-gpu runs
    " optim.lr=1e-4"
    " optim.epochs=5" # stops training after 5 epochs- change this
    " optim.updates_per_epoch=1000" # 2000 by default, change this if you want checkpoints quicker ig
    " optim.adam.weight_decay=0.01"
    " generate.lm.prompted_samples=False" # skip super long generate step
    " generate.lm.gen_gt_samples=True"
)

!{command}

env: USER=yimt
Dora directory: /tmp/audiocraft_yimt
See https://hydra.cc/docs/1.2/upgrades/1.1_to_1.2/changes_to_job_working_dir/ for more information.
  ret = run_job(
[[36m12-03 10:16:45[0m][[34mdora.distrib[0m][[32mINFO[0m] - world_size is 1, skipping init.[0m
Error executing job with overrides: ['solver=musicgen/musicgen_base_32khz', 'model/lm/model_scale=small', 'continue_from=//pretrained/facebook/musicgen-small', 'conditioner=text2music', 'dset=audio/finetune', 'dataset.num_workers=2', 'dataset.valid.num_samples=1', 'dataset.batch_size=1', 'schedule.cosine.warmup=8', 'optim.optimizer=adamw', 'optim.lr=1e-4', 'optim.epochs=5', 'optim.updates_per_epoch=1000', 'optim.adam.weight_decay=0.01', 'generate.lm.prompted_samples=False', 'generate.lm.gen_gt_samples=True']
Traceback (most recent call last):
  File "/opt/conda/bin/dora", line 8, in <module>
    sys.exit(main())
  File "/opt/conda/lib/python3.10/site-packages/dora/__main__.py", line 170, in main
    args.action(args, ma