In [9]:
import torch
from torch.optim import AdamW
from torch.cuda.amp import GradScaler
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader, Subset
import os

from audiocraft.models import MusicGen
from personal_musicgen.data.datasets import AudioDataset
from personal_musicgen.model_utils import train_step, eval_step

import wandb
wandb.login()

PROJECT_NAME = 'always'
RUN_NAME = 'voice-encodec'
#DATA_DIR = './data/always/chunks_original'
DATA_DIR = './dataset_wav'
CHECKPOINT_DIR = './checkpoints'
MODEL = 'small'
START_WEIGHTS = None
TOTAL_DATA_RATIO = 1.0
EVAL_DATA_RATIO = 0
EPOCHS = 3
BATCH_SIZE = 16
GRAD_ACC_STEPS = 1
LR = 1e-4

run = wandb.init(
    project = PROJECT_NAME,
    name = RUN_NAME,
    config = {
        'dataset': DATA_DIR,
        'model': MODEL,
        'start_weights': START_WEIGHTS,
        'total_data_ratio': TOTAL_DATA_RATIO,
        'eval_data_ratio': EVAL_DATA_RATIO,
        'epochs': EPOCHS,
        'batch_size': BATCH_SIZE,
        'grad_acc_steps': GRAD_ACC_STEPS,
        'lr': LR
    }
)

if not os.path.exists(CHECKPOINT_DIR + f'/{run.id}'):
    os.makedirs(CHECKPOINT_DIR + f'/{run.id}')

torch.manual_seed(222)

########## Data Setup ##########

dataset = AudioDataset(DATA_DIR)
indices = torch.randperm(len(dataset)).tolist()
shuffled_dataset = Subset(dataset, indices[:int(len(indices) * TOTAL_DATA_RATIO)])

train_len = int(len(shuffled_dataset) * (1 - EVAL_DATA_RATIO))
train_dataset = Subset(shuffled_dataset, range(train_len))
if EVAL_DATA_RATIO > 0:
    eval_dataset = Subset(shuffled_dataset, range(train_len, len(shuffled_dataset)))

if EVAL_DATA_RATIO > 0:
    print(f'{len(train_dataset)=}, {len(eval_dataset)=}')
else:
    print(f'{len(train_dataset)=}')

train_dataloader = DataLoader(train_dataset, BATCH_SIZE, shuffle=False)
if EVAL_DATA_RATIO > 0:
    eval_dataloader = DataLoader(eval_dataset, BATCH_SIZE, shuffle=False)

########## Model Setup ##########

torch.cuda.empty_cache()

model = MusicGen.get_pretrained(MODEL)
model.lm = model.lm.to(torch.float32)
device = model.device

if START_WEIGHTS != None:
    model.lm.load_state_dict(torch.load(START_WEIGHTS)['model_state_dict'])

print(f'{device=}')

optimizer = AdamW(
    list(model.lm.condition_provider.parameters()) + \
    list(model.compression_model.encoder.parameters()) + \
    list(model.compression_model.decoder.parameters()),
    lr=LR,
    betas=(0.9, 0.95),
    weight_decay=0.1,
)

if START_WEIGHTS != None:
    optimizer.load_state_dict(torch.load(START_WEIGHTS)['optimizer_state_dict'])

scaler = GradScaler()
scheduler = CosineAnnealingLR(optimizer, T_max=10)

########## Training ##########

if START_WEIGHTS != None:
    start_epoch = torch.load(START_WEIGHTS)['epoch']
else:
    start_epoch = 0

for epoch in range(start_epoch, EPOCHS):
    print(f'epoch {epoch}/{EPOCHS}')

    train_loss = train_step(
        model,
        optimizer,
        scaler,
        train_dataloader,
        GRAD_ACC_STEPS,
        scheduler = scheduler
    )['train_loss']

    wandb.log({
        'epoch': epoch,
        'train_loss': train_loss
    })

    if EVAL_DATA_RATIO > 0:
        eval_loss = eval_step(
            model,
            eval_dataloader
        )['eval_loss']

        wandb.log({
            'epoch': epoch,
            'eval_loss': eval_loss
        })
    
    if (epoch + 1) % 25 == 0:
        checkpoint_path = os.path.join(CHECKPOINT_DIR + f'/{run.id}', f'checkpoint_{epoch}.pth')
        print(f'Saving to {checkpoint_path}...')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.lm.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, checkpoint_path)



len(train_dataset)=2


HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': './musicgen-model/model'. Use `repo_type` argument if needed.

In [1]:
try:
    import numpy as np
    print("NumPy is installed. Version:", np.__version__)
except ImportError:
    print("NumPy is not installed.")


NumPy is installed. Version: 1.24.1


In [12]:
pip install numpy==1.24.1


Collecting numpy==1.24.1Note: you may need to restart the kernel to use updated packages.

  Downloading numpy-1.24.1-cp311-cp311-win_amd64.whl.metadata (5.6 kB)


  You can safely remove it manually.


Downloading numpy-1.24.1-cp311-cp311-win_amd64.whl (14.8 MB)
   ---------------------------------------- 0.0/14.8 MB ? eta -:--:--

  You can safely remove it manually.


   ------ --------------------------------- 2.4/14.8 MB 14.9 MB/s eta 0:00:01
   ------------- -------------------------- 5.0/14.8 MB 11.6 MB/s eta 0:00:01
   ---------------------------------------  14.7/14.8 MB 25.6 MB/s eta 0:00:01
   ---------------------------------------- 14.8/14.8 MB 23.3 MB/s eta 0:00:00
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
Successfully installed numpy-1.24.1



ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
blis 1.0.1 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.1 which is incompatible.
thinc 8.3.2 requires numpy<2.1.0,>=2.0.0; python_version >= "3.9", but you have numpy 1.24.1 which is incompatible.
