In [1]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F

In [2]:
# Load the feature matrix from the CSV file
df = pd.read_csv('./Data/features_3_sec.csv')

# Drop the filename column
df = df.drop('filename', axis=1)

# Separate features and labels
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encode labels
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
class MusicGenreDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = MusicGenreDataset(X_train, y_train)
test_dataset = MusicGenreDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [75]:
class CNN(nn.Module):
	def __init__(self, num_classes):
		super(CNN, self).__init__()
		self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1)
		self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
		self.conv3 = nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
		self.pool = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
		self.dropout = nn.Dropout(0.2)
		self.batch_norm1 = nn.BatchNorm1d(64)
		self.batch_norm2 = nn.BatchNorm1d(128)
		self.batch_norm3 = nn.BatchNorm1d(256)

		# Compute the flattened size for the fully connected layer
		self._to_linear = None
		self.convs(torch.randn(1, 1, 60))
		
		self.fc1 = nn.Linear(self._to_linear, 512)
		self.fc2 = nn.Linear(512, num_classes)
		#self.fc3 = nn.Linear(64, num_classes)

	def convs(self, x):
		x = self.pool(F.relu(self.batch_norm1(self.conv1(x))))
		x = self.pool(F.relu(self.batch_norm2(self.conv2(x))))
		x = self.pool(F.relu(self.batch_norm3(self.conv3(x))))

		if self._to_linear is None:
			self._to_linear = x.shape[1] * x.shape[2]
		
		return x

	def forward(self, x):
		x = x.unsqueeze(1)  # Add channel dimension
		x = self.convs(x)
		x = x.view(x.size(0), -1)  # Flatten the tensor
		x = F.relu(self.fc1(x))
		x = self.dropout(x)
		x = self.fc2(x)
		return x

In [77]:
# Create the model
num_classes = len(np.unique(y_train))
model = CNN(num_classes)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 13
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
	for inputs, labels in test_loader:
		inputs, labels = inputs.to(device), labels.to(device)
		outputs = model(inputs)
		_, predicted = torch.max(outputs.data, 1)
		total += labels.size(0)
		correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total:.4f}')


Epoch [1/13], Loss: 1.1638
Epoch [2/13], Loss: 0.7083
Epoch [3/13], Loss: 0.4960
Epoch [4/13], Loss: 0.3666
Epoch [5/13], Loss: 0.2877
Epoch [6/13], Loss: 0.2136
Epoch [7/13], Loss: 0.1702
Epoch [8/13], Loss: 0.1144
Epoch [9/13], Loss: 0.1156
Epoch [10/13], Loss: 0.1053
Epoch [11/13], Loss: 0.0729
Epoch [12/13], Loss: 0.0793
Epoch [13/13], Loss: 0.0550
Accuracy: 0.8914
