In [2]:
import pandas as pd
from ast import literal_eval
literals = {
  'mfcc':  literal_eval,
  'chroma': literal_eval
}
df = pd.read_csv('rnn_features.csv', converters=literals)

import numpy as np
df = df[['label', 'mfcc', 'chroma']]

df.groupby('label').size()

df['mfcc'] = df['mfcc'].apply(lambda x: np.array(x))
df['chroma'] = df['chroma'].apply(lambda x: np.array(x))

dataset = df.values
labels = dataset[:, 0]
dataset = dataset[:, 1:]

# Create an empty list to store the concatenated arrays
concatenated_data = []

# Iterate over each pair of arrays in the data
for array1, array2 in dataset:
    # Concatenate the arrays along the first dimension (axis 0)
    concatenated_array = np.concatenate((array1, array2), axis=0)
    # Append the concatenated array to the list
    concatenated_data.append(concatenated_array.T)

# Convert the list of concatenated arrays to a numpy array
final_data = np.array(concatenated_data)

dataset = final_data

encoding = {
    'Romania': 0,          
    'Southern-Italy': 1,  
    'Thessaly': 2,         
    'Thrace': 3,           
    'Turkey': 4
}

labels2 = [0] * len(labels)
for i,label in enumerate(labels):
    labels2[i] = encoding[label]

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from sound_utils import *
from data_utils import *

num_classes = 5 
dataset = AudioDataset(dataset, labels2)

# Define the sizes for the splits
total_size = len(dataset)
train_size = int(0.7 * total_size)
val_size = int(0.15 * total_size)
test_size = total_size - train_size - val_size

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=5, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=5, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False, collate_fn=collate_fn)

input_shape = 25

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

class TransformerModel(nn.Module):
    def __init__(self, feature_dim, num_classes, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.feature_dim = feature_dim
        
        self.positional_encoding = nn.Parameter(torch.zeros(1, 1000, feature_dim))
        encoder_layer = nn.TransformerEncoderLayer(d_model=feature_dim, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        
        self.fc = nn.Linear(feature_dim, num_classes)

    def forward(self, src):
        src = src + self.positional_encoding[:, :src.size(1), :]
        output = self.transformer_encoder(src)
        output = output.mean(dim=1)
        output = self.fc(output)
        return output

# Parameters for the transformer
feature_dim = 25  # Dimension of features per frame
num_classes = 5  # Number of output classes
nhead = 5  # Number of attention heads
num_encoder_layers = 3  # Number of transformer encoder layers
dim_feedforward = 256  # Dimension of feedforward network
dropout = 0.1  # Dropout rate

model = TransformerModel(feature_dim, num_classes, nhead, num_encoder_layers, dim_feedforward, dropout)




In [6]:
def train_model(model, train_loader, val_loader, num_epochs=25, learning_rate=1e-3):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels, _ in train_loader:
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        
        val_loss, val_acc = validate_model(model, val_loader)
        
        print(f"Epoch {epoch}/{num_epochs - 1}, Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

def validate_model(model, val_loader):
    model.eval()
    running_loss = 0.0
    correct_preds = 0
    
    criterion = nn.CrossEntropyLoss()
    
    with torch.no_grad():
        for inputs, labels, _ in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_preds += torch.sum(preds == labels.data)
    
    epoch_loss = running_loss / len(val_loader.dataset)
    epoch_acc = correct_preds.double() / len(val_loader.dataset)
    
    return epoch_loss, epoch_acc

# Assume train_loader and val_loader are defined
num_epochs = 25
learning_rate = 1e-3

train_model(model, train_loader, val_loader, num_epochs, learning_rate)

Epoch 0/24, Train Loss: 1.4415, Val Loss: 1.5792, Val Acc: 0.1538
Epoch 1/24, Train Loss: 1.3689, Val Loss: 1.6489, Val Acc: 0.1538
Epoch 2/24, Train Loss: 1.3532, Val Loss: 1.5500, Val Acc: 0.1538
Epoch 3/24, Train Loss: 1.3411, Val Loss: 1.6119, Val Acc: 0.1538
Epoch 4/24, Train Loss: 1.3299, Val Loss: 1.6281, Val Acc: 0.1538
Epoch 5/24, Train Loss: 1.3107, Val Loss: 1.6124, Val Acc: 0.3077
Epoch 6/24, Train Loss: 1.2314, Val Loss: 1.4261, Val Acc: 0.3846
Epoch 7/24, Train Loss: 1.2078, Val Loss: 1.2444, Val Acc: 0.3846
Epoch 8/24, Train Loss: 1.1418, Val Loss: 1.2892, Val Acc: 0.4615
Epoch 9/24, Train Loss: 1.0954, Val Loss: 1.3221, Val Acc: 0.4615
Epoch 10/24, Train Loss: 1.0826, Val Loss: 1.6718, Val Acc: 0.2308
Epoch 11/24, Train Loss: 1.0849, Val Loss: 1.1619, Val Acc: 0.3846
Epoch 12/24, Train Loss: 1.0584, Val Loss: 1.2451, Val Acc: 0.3846
Epoch 13/24, Train Loss: 1.0721, Val Loss: 1.5724, Val Acc: 0.2308
Epoch 14/24, Train Loss: 0.9961, Val Loss: 1.2019, Val Acc: 0.5385
Epoch

In [7]:
def test_model(model, test_loader):
    model.eval()
    correct_preds = 0
    
    with torch.no_grad():
        for inputs, labels, _ in test_loader:
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct_preds += torch.sum(preds == labels.data)
    
    test_acc = correct_preds.double() / len(test_loader.dataset)
    print(f"Test Accuracy: {test_acc:.4f}")

# Assume test_loader is defined
test_model(model, test_loader)


Test Accuracy: 0.4000
