In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.functional import F
from dataset import *
from models import *
from options import Options
from utils import *
from IPython.display import clear_output
import os
from tqdm import tqdm
import argparse

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device, " used for training")

cpu  used for training


In [2]:
data=pd.read_csv("SongLabel.csv")
data.head(1)

train_df = data.sample(frac=0.8, random_state=42)
test_df = data.drop(train_df.index)
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

In [3]:
class Audio(Dataset):
    def __init__(self, df, step=200):
        self.df = df
        self.step=step

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        folder, emotion = self.df.loc[index, ['folder', 'emotion']]
        song=os.listdir(f"Songs/{folder}/.")[0]
        x, fs = librosa.load(f"Songs\{folder}\{song}")
        start_time = 60  # 1:00
        end_time = 90    # 2:10

        # Convert time to samples
        start_sample = int(start_time * fs)
        end_sample = int(end_time * fs)


        x = x[start_sample:end_sample]
        x = torch.as_tensor(x)
        emotion = torch.as_tensor(emotion)
        return x.view(1,*x.shape), emotion
    


In [4]:
train_set = Audio(train_df)
test_set = Audio(test_df)
train_loader = DataLoader(train_set, shuffle=True, batch_size=10)
test_loader = DataLoader(test_set, shuffle=True, batch_size=10)


In [5]:
import torch
import torch.nn as nn
from torch.functional import F
from torch.fft import fft, ifft

In [6]:
class conv_layer(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size):
        super().__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size)
    def forward(self, X):
        out = self.conv(X)
        out = F.relu(out)
        out = F.avg_pool1d(out, kernel_size=3)
        return out
    

class CNN(nn.ModuleDict):
    def __init__(self,channels,kernal_size,classes):
        super().__init__()
        self.features = nn.Sequential()
        self.features.add_module("conv_layer", nn.Conv1d(1, channels[0], kernal_size))
        for i, (nin, nout) in enumerate(zip(channels[:-1], channels[1:])):
            self.features.add_module(f"conv_layer_{i}", conv_layer(nin, nout, kernal_size))
        self.features.add_module(f"batch_norm", nn.BatchNorm1d(channels[-1]))
        self.classifier = nn.Linear(channels[-1], classes)

    def forward(self, X):
        out = self.features(X)
        out = F.relu(out, inplace=True)
        out = F.adaptive_avg_pool1d(out, 1)
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out
    
    

In [9]:
channels=[16]*8
model=CNN(channels,kernal_size=190,classes=4)

In [10]:
for X,y in train_loader:
    print(X.shape)
    out=model(X)
    print(out)
    break

torch.Size([10, 1, 661500])
tensor([[-0.0417,  0.0954,  0.1292, -0.1338],
        [-0.0346,  0.0973,  0.1202, -0.1474],
        [-0.0371,  0.0957,  0.1254, -0.1387],
        [-0.0392,  0.0944,  0.1265, -0.1368],
        [-0.0386,  0.0949,  0.1256, -0.1380],
        [-0.0359,  0.0998,  0.1247, -0.1407],
        [-0.0343,  0.0970,  0.1218, -0.1439],
        [-0.0389,  0.0930,  0.1269, -0.1366],
        [-0.0387,  0.0934,  0.1267, -0.1368],
        [-0.0406,  0.0960,  0.1280, -0.1354]], grad_fn=<AddmmBackward0>)


In [11]:
def fit(model, train_loader, lossFn, optimizer):  
    device = 'cuda' if next(model.parameters()).is_cuda else 'cpu'
    train_loss = 0
    train_accuracy = 0

    model.train()
    for batch_idx, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(X)
        loss = lossFn(pred, y)
        # print(loss.item())
        train_loss += loss.item()
        train_accuracy += torch.sum(F.softmax(pred, dim=1).argmax(axis=1) == y).item()
        loss.backward()
        optimizer.step()
    train_accuracy /= len(train_loader.dataset)
    return train_loss, train_accuracy

def predict(model, test_loader):
    device = 'cuda' if next(model.parameters()).is_cuda else 'cpu'
    y_test = []
    y_pred = []
    test_accuracy = 0
    
    model.eval()
    with torch.no_grad():
        for batch_idx, (X, y) in enumerate(test_loader):
            X, y = X.to(device), y.to(device)
            pred = model(X)
            y_test.extend(y.tolist())
            y_pred.extend(F.softmax(pred, dim=1).argmax(axis=1).tolist())
            test_accuracy += torch.sum(F.softmax(pred, dim=1).argmax(axis=1) == y).item()
    test_accuracy /= len(test_loader.dataset)
    return y_pred, y_test, test_accuracy

In [12]:
model_folder="models/Incr_Batch_size"
os.makedirs(model_folder,exist_ok=True)
lossFn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
history = {
    "train_loss": [],
    "train_accuracy": [],
    "test_accuracy": []
}

for epoch in tqdm(range(100)):

    train_loss, train_accuracy = fit(model, train_loader, lossFn, optimizer)
    _, _, test_accuracy = predict(model, test_loader)

    state = {
        'epoch': epoch,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    if len(history['test_accuracy']) and test_accuracy >= max(history['test_accuracy']):
        torch.save(state, f"{model_folder}/best.pth")
    history['train_loss'].append(train_loss)
    history['train_accuracy'].append(train_accuracy)
    history['test_accuracy'].append(test_accuracy)

    clear_output(wait=True)
    fig, axs = plt.subplots(len(history),1,figsize=(10,10))
    fig.suptitle(f"Epoch {epoch}")
    for i, key in enumerate(history.keys()):
        ax = axs[i]
        ax.set_xlim(-0.1, epoch+0.1)
        ax.set_title(key)

        ax.plot(history[key])
    plt.show()


    torch.save(history, f"{model_folder}/history.pth")
    torch.save(state, f"{model_folder}/latest.pth")

  0%|          | 0/100 [00:00<?, ?it/s]

: 

In [11]:
_, _, test_accuracy = predict(model, test_loader)

In [12]:
test_accuracy

0.10112359550561797