In [2]:
import h5py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import json
from itertools import product
from collections import defaultdict
from datetime import datetime
from tqdm import tqdm

In [3]:
# Use MPS for Mac GPUs if available
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print("Using device:", device)

Using device: mps


In [4]:
def load_data(filepath):
    with h5py.File(filepath, 'r') as f:
        spectrograms = torch.tensor(f['spectrograms'][:], dtype=torch.float32).unsqueeze(1)
        labels = torch.tensor(f['labels'][:], dtype=torch.long)
    return spectrograms, labels

# Load data from mounted files
train_spectrograms, train_labels = load_data('/Users/elcachorrohumano/workspace/MusicNN/data/train/spec_train.h5')
val_spectrograms, val_labels = load_data('/Users/elcachorrohumano/workspace/MusicNN/data/validation/spec_validation.h5')
test_spectrograms, test_labels = load_data('/Users/elcachorrohumano/workspace/MusicNN/data/test/spec_test.h5')

_, channels, height, width = train_spectrograms.shape
print(f"Train specs: {train_spectrograms.shape}, labels: {train_labels.shape}")
print(f"Val specs: {val_spectrograms.shape}, labels: {val_labels.shape}")
print(f"Test specs: {test_spectrograms.shape}, labels: {test_labels.shape}")

Train specs: torch.Size([2403, 1, 128, 641]), labels: torch.Size([2403])
Val specs: torch.Size([692, 1, 128, 641]), labels: torch.Size([692])
Test specs: torch.Size([348, 1, 128, 641]), labels: torch.Size([348])


In [5]:
batch_size = 64
train_loader = DataLoader(TensorDataset(train_spectrograms, train_labels), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(val_spectrograms, val_labels), batch_size=batch_size)
test_loader = DataLoader(TensorDataset(test_spectrograms, test_labels), batch_size=batch_size)


In [6]:
class ImprovedCNN(nn.Module):
    def __init__(self, input_height, input_width, num_classes, conv_channels=[32, 64, 128], fc_units=[512, 256], dropout_rate=0.25):
        super(ImprovedCNN, self).__init__()
        self.conv_layers = nn.ModuleList()
        in_channels = 1

        for out_channels in conv_channels:
            self.conv_layers.append(nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Dropout2d(dropout_rate)
            ))
            in_channels = out_channels

        self.height_after_conv = input_height // (2 ** len(conv_channels))
        self.width_after_conv = input_width // (2 ** len(conv_channels))

        fc_layers = []
        in_features = conv_channels[-1] * self.height_after_conv * self.width_after_conv

        for units in fc_units:
            fc_layers.extend([
                nn.Linear(in_features, units),
                nn.ReLU(),
                nn.Dropout(dropout_rate),
            ])
            in_features = units

        fc_layers.append(nn.Linear(in_features, num_classes))
        self.fc = nn.Sequential(*fc_layers)

    def forward(self, x):
        for conv_layer in self.conv_layers:
            x = conv_layer(x)
        x = x.view(-1, x.size(1) * self.height_after_conv * self.width_after_conv)
        return self.fc(x)

In [7]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, device,
                max_epochs=30, patience=5, model_path='model.pth'):
    train_metrics = defaultdict(list)
    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(max_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0

        for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{max_epochs}'):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        epoch_train_loss = train_loss / len(train_loader)
        epoch_val_loss = val_loss / len(val_loader)
        epoch_train_acc = train_correct / train_total
        epoch_val_acc = val_correct / val_total

        scheduler.step(epoch_val_loss)

        train_metrics['train_loss'].append(epoch_train_loss)
        train_metrics['val_loss'].append(epoch_val_loss)
        train_metrics['train_acc'].append(epoch_train_acc)
        train_metrics['val_acc'].append(epoch_val_acc)

        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            torch.save(model.state_dict(), model_path)
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch + 1}")
                break

        print(f"Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f}")
        print(f"Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}")

    return train_metrics

In [8]:
def get_param_combinations():
    param_grid = {
        'conv_channels': [[32, 64, 128], [64, 128, 256]],
        'fc_units': [[512, 256], [1024, 512]],
        'dropout_rate': [0.25, 0.5],
        'learning_rate': [0.001, 0.0005],
        'weight_decay': [0.01, 0.001]
    }
    return [dict(zip(param_grid.keys(), v)) for v in product(*param_grid.values())]

def grid_search_part(start_idx, end_idx, train_loader, val_loader, test_loader, device, height, width, num_classes):
    params_list = get_param_combinations()[start_idx:end_idx]
    results = []
    save_path = '/Users/elcachorrohumano/workspace/MusicNN/ml/specs/fine_tuning'

    for i, params in enumerate(params_list, start=start_idx):
        print(f"\nTraining model {i}")
        print("Parameters:", params)

        model = ImprovedCNN(height, width, num_classes,
                           conv_channels=params['conv_channels'],
                           fc_units=params['fc_units'],
                           dropout_rate=params['dropout_rate']).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.AdamW(model.parameters(),
                              lr=params['learning_rate'],
                              weight_decay=params['weight_decay'])
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

        model_path = f'{save_path}/models/model_{i}.pth'
        resutls_path = f'{save_path}/results/results_{i}.json'
        metrics = train_model(model, train_loader, val_loader, criterion, optimizer,
                            scheduler, device, model_path=model_path)

        result = {
            'params': params,
            'best_val_acc': max(metrics['val_acc']),
            'best_val_loss': min(metrics['val_loss']),
            'model_path': model_path,
            'metrics': metrics
        }
        results.append(result)

        with open(f'{save_path}results_{i}.json', 'w') as f:
            json.dump(results, f, indent=4)

    return results

In [9]:
num_classes = len(set(train_labels.numpy()))
results1 = grid_search_part(17, 32, train_loader, val_loader, test_loader, device, height, width, num_classes)


Training model 17
Parameters: {'conv_channels': [64, 128, 256], 'fc_units': [512, 256], 'dropout_rate': 0.25, 'learning_rate': 0.001, 'weight_decay': 0.001}


Epoch 1/30:  42%|████▏     | 16/38 [05:19<07:19, 20.00s/it]


KeyboardInterrupt: 