In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
from tqdm import tqdm
import wandb
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [None]:
class MelSpectrogramDataset(Dataset):
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.classes = os.listdir(root_dir)
        self.data = []
        self.labels = []
        
        for idx, genre in enumerate(self.classes):
            genre_path = os.path.join(root_dir, genre)
            for file in os.listdir(genre_path):
                self.data.append(os.path.join(genre_path, file))
                self.labels.append(idx)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data[idx]
        img = Image.open(img_path).convert('RGB') 
        img = np.array(img) 
        img = img.transpose((2, 0, 1))  
        img = torch.from_numpy(img).float()  
        label = self.labels[idx]
        return img, label


In [None]:
train_dir = '/kaggle/input/mel-spectrogram-for-gtzn-dataset/melspectrograms/train'
val_dir = '/kaggle/input/mel-spectrogram-for-gtzn-dataset/melspectrograms/validation'
test_dir = '/kaggle/input/mel-spectrogram-for-gtzn-dataset/melspectrograms/test'

train_dataset = MelSpectrogramDataset(train_dir)
val_dataset = MelSpectrogramDataset(val_dir)
test_dataset = MelSpectrogramDataset(test_dir)


In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 32, (3,3)),
            nn.ReLU(),
            nn.MaxPool2d((3,3), (2,2), padding=(1,1)),
            nn.BatchNorm2d(32),
            
            nn.Conv2d(32, 32, (3,3)),
            nn.ReLU(),
            nn.MaxPool2d((3,3), (2,2), padding=(1,1)),
            nn.BatchNorm2d(32),
            
            nn.Conv2d(32, 32, (2,2)),
            nn.ReLU(),
            nn.MaxPool2d((2,2), (2,2), padding=(0,1)),
            nn.BatchNorm2d(32),
            
            nn.Flatten(),
            nn.Linear(42624, 64), 
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        return self.net(x)

model = CNN()



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=0.0)

In [None]:
for epoch in range(30):
    model.train()
    running_loss = 0.0
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/30')
    for X_batch, y_batch in pbar:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        wandb.log({"Train Loss": loss.item()})
    
    print(f'Loss: {running_loss / len(train_loader)}')

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    y_pred = []
    y_true = []
    with torch.no_grad():
        pbar_val = tqdm(val_loader, desc='Validation')
        for X_batch, y_batch in pbar_val:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == y_batch).sum().item()
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(y_batch.cpu().numpy())
        
        accuracy = correct / len(val_dataset)
        precision = precision_score(y_true, y_pred, average='macro')
        recall = recall_score(y_true, y_pred, average='macro')
        f1 = f1_score(y_true, y_pred, average='macro')


        print(f'Validation Loss: {val_loss / len(val_loader)}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}')
    
    scheduler.step()


In [None]:
model.eval()
test_loss = 0.0
correct = 0
with torch.no_grad():
    pbar_test = tqdm(test_loader, desc='Test')
    for X_batch, y_batch in pbar_test:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        if pbar_test.n > 0:
            pbar_test.set_postfix({'Loss': f'{test_loss / (pbar_test.n + 1):.4f}', 'Acc': f'{correct / len(test_dataset):.4f}'})
        else:
            pbar_test.set_postfix({'Loss': f'{test_loss:.4f}', 'Acc': f'N/A'})
test_accuracy = correct / len(test_dataset)
print(f'Test Loss: {test_loss / len(test_loader)}, Accuracy: {test_accuracy:.4f}')
