Ερώτημα 1

Βημα 1 : Φόρτωση δεδομένων (mfccs)

In [None]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from collections import defaultdict

label_id = {
    "classical": 0,
    "hiphop": 1,
    "rock_metal_hardrock": 2,
    "blues": 3
}
id_to_label = {v: k for k, v in label_id.items()}

def get_label_id(name):
    return label_id[name]

X_train = np.load('/content/drive/MyDrive/music_genre_data_di/train/pyaudioanalysis/X.npy')
y_train = np.load('/content/drive/MyDrive/music_genre_data_di/train/pyaudioanalysis/labels.npy')
X_val = np.load('/content/drive/MyDrive/music_genre_data_di/val/pyaudioanalysis/X.npy')
y_val = np.load('/content/drive/MyDrive/music_genre_data_di/val/pyaudioanalysis/labels.npy')
X_test = np.load('/content/drive/MyDrive/music_genre_data_di/test/pyaudioanalysis/X.npy')
y_test = np.load('/content/drive/MyDrive/music_genre_data_di/test/pyaudioanalysis/labels.npy')

#αντιστιχιση labels στους αντιστιχους αριθμους
y_train_ids = np.array([get_label_id(label) for label in y_train])
y_val_ids = np.array([get_label_id(label) for label in y_val])
y_test_ids = np.array([get_label_id(label) for label in y_test])

#μετατροπη σε TensorDataset
batch_size = 16

train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                              torch.tensor(y_train_ids, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                            torch.tensor(y_val_ids, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32),
                             torch.tensor(y_test_ids, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Βήμα 2 : Ορισμός Νευρωνικού Δικτύου

In [None]:
import torch.nn as nn

class FeedforwardNN(nn.Module):
    def __init__(self):
        super(FeedforwardNN, self).__init__()
        self.layer1 = nn.Linear(26, 128)
        self.layer2 = nn.Linear(128, 32)
        self.layer3 = nn.Linear(32, 4)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

Βήμα 3 : Ορισμός διαδικασίας εκπαίδευσης

In [None]:
def train_model(model, dataloader, optimizer, loss_fn, device, epochs):
    model.to(device)

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_x, batch_y in dataloader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            #υπολογισμος προβλεψεων
            outputs = model(batch_x)

            #υπολογισμος loss(ποσο απεχει απο τις σωστες τιμες)
            loss = loss_fn(outputs, batch_y)

            #μηδενισμος gradients,υπολογισμος νεων με backpropagation και ενημερωση βαρων
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

    return model  #Επιστρέφει το εκπαιδευμένο μοντέλο


Βήμα 4 : Ορισμός διαδικασίας αξιολόγησης

In [None]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
import torch

device = torch.device("cpu")

def evaluate_model(model, dataloader, loss_fn, device):
    model.eval()
    y_true = []
    y_pred = []
    loss_total = 0

    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = loss_fn(output, target)
            loss_total += loss.item()
            pred = output.argmax(1)
            y_pred.extend(pred.cpu().numpy())
            y_true.extend(target.cpu().numpy())

    loss_avg = loss_total / len(dataloader)
    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)

    return loss_avg, f1, acc, cm

Βήμα 5 : Εκπαίδευση δικτύου

In [None]:
device = torch.device("cpu")

model = FeedforwardNN().to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.002)

loss_fn = torch.nn.CrossEntropyLoss()

def train_model(model, dataloader, optimizer, loss_fn, device, epochs):
    model.to(device)

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_x, batch_y in dataloader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = loss_fn(outputs, batch_y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

    return model

epochs = 30
trained_model = train_model(model, train_loader, optimizer, loss_fn, device, epochs)

test_loss, test_f1, test_acc, test_cm = evaluate_model(trained_model, test_loader, loss_fn, device)

print("\nτελικη αξιολογηση στο test set:")
print(f"Loss: {test_loss:.4f}")
print(f"F1 Score (macro): {test_f1:.4f}")
print(f"Accuracy: {test_acc:.4f}")
print("Confusion Matrix:")
print(test_cm)


Epoch 1/30 - Loss: 1.4016
Epoch 2/30 - Loss: 1.3718
Epoch 3/30 - Loss: 1.3482
Epoch 4/30 - Loss: 1.3250
Epoch 5/30 - Loss: 1.3008
Epoch 6/30 - Loss: 1.2744
Epoch 7/30 - Loss: 1.2484
Epoch 8/30 - Loss: 1.2224
Epoch 9/30 - Loss: 1.1894
Epoch 10/30 - Loss: 1.1594
Epoch 11/30 - Loss: 1.1375
Epoch 12/30 - Loss: 1.1130
Epoch 13/30 - Loss: 1.0878
Epoch 14/30 - Loss: 1.0676
Epoch 15/30 - Loss: 1.0464
Epoch 16/30 - Loss: 1.0313
Epoch 17/30 - Loss: 1.0222
Epoch 18/30 - Loss: 0.9993
Epoch 19/30 - Loss: 0.9953
Epoch 20/30 - Loss: 0.9790
Epoch 21/30 - Loss: 0.9783
Epoch 22/30 - Loss: 0.9681
Epoch 23/30 - Loss: 0.9608
Epoch 24/30 - Loss: 0.9640
Epoch 25/30 - Loss: 0.9582
Epoch 26/30 - Loss: 0.9477
Epoch 27/30 - Loss: 0.9421
Epoch 28/30 - Loss: 0.9355
Epoch 29/30 - Loss: 0.9307
Epoch 30/30 - Loss: 0.9276

τελικη αξιολογηση στο test set:
Loss: 0.9349
F1 Score (macro): 0.6175
Accuracy: 0.6337
Confusion Matrix:
[[243  10  16  28]
 [ 14 257  31  54]
 [ 35  41 284  39]
 [ 29  83 124  88]]


Βήμα 6 : Εκπαίδευση δικτύου με GPU

In [None]:
import time
import torch.nn as nn

#CPU
device_cpu = torch.device("cpu")

model_cpu = FeedforwardNN().to(device_cpu)

optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=0.002)
loss_fn_cpu = nn.CrossEntropyLoss()

start_cpu = time.time()
trained_model_cpu = train_model(model_cpu, train_loader, optimizer_cpu, loss_fn_cpu, device_cpu, epochs=30)
end_cpu = time.time()

cpu_duration = end_cpu - start_cpu
print(f"χρονος εκπαιδευσης με CPU: {cpu_duration:.2f} δευτερολεπτα")

#GPU
device_gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Χρηση :", device_gpu)

model_gpu = FeedforwardNN().to(device_gpu)

optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=0.002)
loss_fn_gpu = nn.CrossEntropyLoss()

start_gpu = time.time()
trained_model_gpu = train_model(model_gpu, train_loader, optimizer_gpu, loss_fn_gpu, device_gpu, epochs=30)
end_gpu = time.time()

gpu_duration = end_gpu - start_gpu
print(f"χχρονος εκπαιδευσης με GPU: {gpu_duration:.2f} δευτερόλεπτα")

print(f"\nσυγκριση Χρονου Εκπαιδευσης:")
print(f"CPU: {cpu_duration:.2f} δευτερολεπτα")
print(f"GPU: {gpu_duration:.2f} δευτερόλεπτα")

test_loss, test_f1, test_acc, test_cm = evaluate_model(trained_model_gpu, test_loader, loss_fn_gpu, device_gpu)

print("\nτελικη αξιολογηση (GPU):")
print(f"Loss: {test_loss:.4f}")
print(f"F1 Score (macro): {test_f1:.4f}")
print(f"Accuracy: {test_acc:.4f}")
print("Confusion Matrix:")
print(test_cm)

Epoch 1/30 - Loss: 1.3839
Epoch 2/30 - Loss: 1.3495
Epoch 3/30 - Loss: 1.3292
Epoch 4/30 - Loss: 1.3026
Epoch 5/30 - Loss: 1.2795
Epoch 6/30 - Loss: 1.2525
Epoch 7/30 - Loss: 1.2261
Epoch 8/30 - Loss: 1.1940
Epoch 9/30 - Loss: 1.1663
Epoch 10/30 - Loss: 1.1389
Epoch 11/30 - Loss: 1.1087
Epoch 12/30 - Loss: 1.0888
Epoch 13/30 - Loss: 1.0636
Epoch 14/30 - Loss: 1.0450
Epoch 15/30 - Loss: 1.0334
Epoch 16/30 - Loss: 1.0140
Epoch 17/30 - Loss: 1.0026
Epoch 18/30 - Loss: 0.9852
Epoch 19/30 - Loss: 0.9786
Epoch 20/30 - Loss: 0.9698
Epoch 21/30 - Loss: 0.9606
Epoch 22/30 - Loss: 0.9507
Epoch 23/30 - Loss: 0.9509
Epoch 24/30 - Loss: 0.9396
Epoch 25/30 - Loss: 0.9441
Epoch 26/30 - Loss: 0.9325
Epoch 27/30 - Loss: 0.9390
Epoch 28/30 - Loss: 0.9296
Epoch 29/30 - Loss: 0.9263
Epoch 30/30 - Loss: 0.9239
χρονος εκπαιδευσης με CPU: 7.13 δευτερολεπτα
Χρηση : cuda
Epoch 1/30 - Loss: 1.3667
Epoch 2/30 - Loss: 1.3405
Epoch 3/30 - Loss: 1.3141
Epoch 4/30 - Loss: 1.2904
Epoch 5/30 - Loss: 1.2620
Epoch 6/30 

Βήμα 7 : Επιλογή μοντέλου

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FeedforwardNN().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.002)
loss_fn = nn.CrossEntropyLoss()

best_model_state = None
best_f1 = -1.0

epochs = 30

for epoch in range(epochs):
    print(f"\nEpoch {epoch+1}/{epochs}")

    #εκπαιδευση σε 1 epoch
    model.train()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Train Loss: {avg_train_loss:.4f}")

    #αξιολογηση στο validation set
    val_loss, val_f1, val_acc, _ = evaluate_model(model, val_loader, loss_fn, device)
    print(f"Validation F1: {val_f1:.4f}  Accuracy: {val_acc:.4f}")

    #αποθηκευση του καλυτερου μοντελου μεχρι τωρα
    if val_f1 > best_f1:
        best_f1 = val_f1
        best_model_state = model.state_dict().copy()
        print("νεο καλυτερο μοντελο αποθηκευτηκε.")

#εντοπισμος του καλυτερου μοντελου
best_model = FeedforwardNN().to(device)
best_model.load_state_dict(best_model_state)

#αξιολογηση στο test set
test_loss, test_f1, test_acc, test_cm = evaluate_model(best_model, test_loader, loss_fn, device)

print("\ntest set με το καλυτερο μοντελο (με βάση validation F1):")
print(f"Loss: {test_loss:.4f}")
print(f"F1 Score (macro): {test_f1:.4f}")
print(f"Accuracy: {test_acc:.4f}")
print("Confusion Matrix:")
print(test_cm)



Epoch 1/30
Train Loss: 1.3870
Validation F1: 0.2321  Accuracy: 0.3513
νεο καλυτερο μοντελο αποθηκευτηκε.

Epoch 2/30
Train Loss: 1.3661
Validation F1: 0.2494  Accuracy: 0.3688
νεο καλυτερο μοντελο αποθηκευτηκε.

Epoch 3/30
Train Loss: 1.3418
Validation F1: 0.4523  Accuracy: 0.5150
νεο καλυτερο μοντελο αποθηκευτηκε.

Epoch 4/30
Train Loss: 1.3192
Validation F1: 0.4109  Accuracy: 0.4512

Epoch 5/30
Train Loss: 1.2966
Validation F1: 0.3099  Accuracy: 0.4263

Epoch 6/30
Train Loss: 1.2718
Validation F1: 0.6041  Accuracy: 0.6138
νεο καλυτερο μοντελο αποθηκευτηκε.

Epoch 7/30
Train Loss: 1.2451
Validation F1: 0.5642  Accuracy: 0.6012

Epoch 8/30
Train Loss: 1.2147
Validation F1: 0.5199  Accuracy: 0.5300

Epoch 9/30
Train Loss: 1.1808
Validation F1: 0.5350  Accuracy: 0.6225

Epoch 10/30
Train Loss: 1.1576
Validation F1: 0.4987  Accuracy: 0.5875

Epoch 11/30
Train Loss: 1.1240
Validation F1: 0.6183  Accuracy: 0.6025
νεο καλυτερο μοντελο αποθηκευτηκε.

Epoch 12/30
Train Loss: 1.1085
Validation

Η απόδοση στο test set είναι αρκετά κοντά σε αυτή του validation set, κάτι που δείχνει ότι το μοντέλο γενικεύει ικανοποιητικά.

Από την confusion matrix φαίνεται πως υπάρχει μπέρδεμα ανάμεσα στα είδη, ειδικά το blues με pop και rock. Αυτό είναι λογικό, γιατί τα είδη αυτά μπορεί να έχουν παρόμοια χαρακτηριστικά.

Η επιλογή του καλύτερου μοντέλου με βάση το F1-score ήταν σωστή, γιατί είναι πιο κατάλληλη μετρική όταν οι κλάσεις δεν είναι απόλυτα ισορροπημένες.

Ερώτημα 2: Convolutional Neural Network

Βήμα 1 : Φόρτωση δεδομένων (spectrograms)

In [None]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from collections import defaultdict
import matplotlib.pyplot as plt

#φορτωση mel-spectrogram δεδομενων για train/val/test
X_train = np.load('/content/drive/MyDrive/music_genre_data_di/train/melgrams/X.npy')
y_train = np.load('/content/drive/MyDrive/music_genre_data_di/train/melgrams/labels.npy')
X_val = np.load('/content/drive/MyDrive/music_genre_data_di/val/melgrams/X.npy')
y_val = np.load('/content/drive/MyDrive/music_genre_data_di/val/melgrams/labels.npy')
X_test = np.load('/content/drive/MyDrive/music_genre_data_di/test/melgrams/X.npy')
y_test = np.load('/content/drive/MyDrive/music_genre_data_di/test/melgrams/labels.npy')

#αντιστοιχιση labels σε ids
y_train_ids = np.array([get_label_id(label) for label in y_train])
y_val_ids = np.array([get_label_id(label) for label in y_val])
y_test_ids = np.array([get_label_id(label) for label in y_test])

min_len_train = min(len(X_train), len(y_train_ids))
min_len_val = min(len(X_val), len(y_val_ids))
min_len_test = min(len(X_test), len(y_test_ids))

X_train = X_train[:min_len_train]
y_train_ids = y_train_ids[:min_len_train]
X_val = X_val[:min_len_val]
y_val_ids = y_val_ids[:min_len_val]
X_test = X_test[:min_len_test]
y_test_ids = y_test_ids[:min_len_test]

if len(X_train.shape) == 3:
    X_train = X_train[:, np.newaxis, :, :]
    X_val = X_val[:, np.newaxis, :, :]
    X_test = X_test[:, np.newaxis, :, :]

min_len_train = min(len(X_train), len(y_train))
min_len_val = min(len(X_val), len(y_val))
min_len_test = min(len(X_test), len(y_test))

X_train = X_train[:min_len_train]
y_train = y_train[:min_len_train]

X_val = X_val[:min_len_val]
y_val = y_val[:min_len_val]

X_test = X_test[:min_len_test]
y_test = y_test[:min_len_test]

#δημιουργια TensorDatasets και DataLoaders για τα mel δεδομενα
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32),
                              torch.tensor(y_train_ids, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32),
                            torch.tensor(y_val_ids, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32),
                             torch.tensor(y_test_ids, dtype=torch.long))

batch_size = 16

train_loader_mel = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader_mel   = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader_mel  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Βήμα 2 : Ορισμός Νευρωνικού Δικτύου

In [None]:
import torch.nn as nn
import torch.nn.functional as F

#ορισμος CNN με 4 fully connected layers
class Convolutional_Neural_Network(nn.Module):
    def __init__(self, input_shape=(1, 128, 21), out_dim=4):
        super().__init__()

        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=5)

        #υπολογισμος του flatten size με dummy input
        with torch.no_grad():
            x = torch.zeros((1, *input_shape))
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.conv3(x)
            x = self.conv4(x)
            self.flatten_dim = x.view(1, -1).shape[1]

        self.fc1 = nn.Linear(self.flatten_dim, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 32)
        self.fc4 = nn.Linear(32, out_dim)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        return x

Βήμα 3 : Εκπαίδευση δικτύου

In [None]:
import torch
import torch.nn as nn
import time
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

#gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("χρηση:", device)

#συναρτηση αξιολογησης
def evaluate(dataloader, model, loss_fn, device):
    model.to(device)
    model.eval()
    y_true, y_pred = [], []
    loss_total = 0

    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = loss_fn(output, y)
            loss_total += loss.item()
            preds = output.argmax(dim=1)
            y_pred.extend(preds.cpu().numpy())
            y_true.extend(y.cpu().numpy())

    avg_loss = loss_total / len(dataloader)
    f1 = f1_score(y_true, y_pred, average='macro')
    acc = accuracy_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)

    return avg_loss, acc, f1, cm

#συναρτηση εκπαιδευσης με αποθηκευση του καλυτερου μοντελου βαση F1
def train_loop(num_epochs, train_loader, val_loader, model, loss_fn, optimizer, device, show=False):
    best_score = 0.0
    model.to(device)

    for epoch in range(num_epochs):
        if show:
            print(f"\nEpoch {epoch+1}/{num_epochs}\n-----------------------------")

        model.train()
        for batch, (X, y) in enumerate(train_loader):
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            preds = model(X)
            loss = loss_fn(preds, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            optimizer.step()

            if show and batch % 10 == 0:
                print(f"loss: {loss.item():.6f}  [{batch * len(X):>4}/{len(train_loader.dataset)}]")

        val_loss, val_acc, val_f1, _ = evaluate(val_loader, model, loss_fn, device)
        if show:
            print(f"Validation F1: {val_f1:.4f}  Validation Loss: {val_loss:.4f}")

        if val_f1 > best_score:
            best_score = val_f1
            torch.save(model.state_dict(), 'best_model.pth')

    model.load_state_dict(torch.load('best_model.pth'))
    return model

#ορισμος μοντελου και παραμετρων
model = Convolutional_Neural_Network(out_dim=4).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

#εκπαιδευση
start_time = time.time()
best_model = train_loop(
    num_epochs=30,
    train_loader=train_loader_mel,
    val_loader=val_loader_mel,
    model=model,
    loss_fn=loss_fn,
    optimizer=optimizer,
    device=device,
    show=True
)
training_time = time.time() - start_time

#τελικη αξιολόγηση στο test set
test_loss, test_acc, test_f1, test_cm = evaluate(test_loader_mel, best_model, loss_fn, device)

print("\nαποτελεσματα στο Test Set:")
print(f"Accuracy: {test_acc*100:.2f}%")
print(f"F1 Score: {test_f1*100:.2f}%")
print(f"Loss: {test_loss:.4f}")
print("Confusion Matrix:\n", test_cm)

print(f"\nχρονος Εκπαιδευσης: {training_time:.2f} sec")


Χρήση συσκευής: cuda

Epoch 1/30
-----------------------------
loss: 1.388921  [   0/3200]
loss: 31158.285156  [ 160/3200]
loss: 2205.781982  [ 320/3200]
loss: 277.120575  [ 480/3200]
loss: 87.989342  [ 640/3200]
loss: 71.184975  [ 800/3200]
loss: 17.462893  [ 960/3200]
loss: 937.490417  [1120/3200]
loss: 65.072227  [1280/3200]
loss: 8.318715  [1440/3200]
loss: 8.834235  [1600/3200]
loss: 13.339721  [1760/3200]
loss: 4.128295  [1920/3200]
loss: 5.693360  [2080/3200]
loss: 90.071930  [2240/3200]
loss: 48.302578  [2400/3200]
loss: 100.360039  [2560/3200]
loss: 118.048706  [2720/3200]
loss: 54.915764  [2880/3200]
loss: 23.228378  [3040/3200]
Validation F1: 0.1195  Validation Loss: 21.8003

Epoch 2/30
-----------------------------
loss: 21.563667  [   0/3200]
loss: 11.951695  [ 160/3200]
loss: 26.185003  [ 320/3200]
loss: 5.499391  [ 480/3200]
loss: 31.051931  [ 640/3200]
loss: 21.425745  [ 800/3200]
loss: 389.490540  [ 960/3200]
loss: 46.822117  [1120/3200]
loss: 22.044001  [1280/3200]
lo

Το CNN εκπαιδεύτηκε κανονικά και στις δύο συσκευές. Παρατηρείται σημαντική διαφορά στους χρόνους εκπαίδευσης υπέρ της GPU,καθώς και επειδή διαρκούσε πολύ η cpu την αφαιρέσαμε.

Βήμα 4 : Pooling and padding

In [None]:
import torch.nn as nn
import torch.nn.functional as F

#τροποποιημενο CNN με Padding και Pooling
class Convolutional_Neural_Network_Padded(nn.Module):
    def __init__(self, out_dim):
        super().__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(32, 64, kernel_size=5, padding=2),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(64, 128, kernel_size=5, padding=2),
            nn.MaxPool2d(kernel_size=2)
        )
        dummy_input = torch.zeros(1, 1, 128, 21)
        with torch.no_grad():
            dummy_out = self.conv_layers(dummy_input)
        flat_dim = dummy_out.view(1, -1).shape[1]

        self.fc_layers = nn.Sequential(
            nn.Linear(flat_dim, 1024),
            nn.Linear(1024, 256),
            nn.Linear(256, 32),
            nn.Linear(32, out_dim)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

#εκπαιδευση νεου μοντελου
model_padded = Convolutional_Neural_Network_Padded(out_dim=4).to(device)
optimizer = torch.optim.Adam(model_padded.parameters(), lr=0.002)
loss_fn = nn.CrossEntropyLoss()

start_time = time.time()
best_model_padded = train_loop(
    num_epochs=30,
    train_loader=train_loader_mel,
    val_loader=val_loader_mel,
    model=model_padded,
    loss_fn=loss_fn,
    optimizer=optimizer,
    device=device,
    show=True
)
training_time = time.time() - start_time

#αξιολογηση στο test set
test_loss, test_acc, test_f1, test_cm = evaluate(test_loader_mel, best_model_padded, loss_fn, device)

print(f"\nχρονος εκπαιδευσης με padding+pooling: {training_time:.2f} sec")
print(f"Αποτελεσματα στο test set:")
print(f"Accuracy: {test_acc * 100:.2f}%")
print(f"Macro F1 Score: {test_f1 * 100:.2f}%")
print(f"Average Loss: {test_loss:.4f}")
print("Confusion Matrix:")
print(test_cm)



Epoch 1/30
-----------------------------
loss: 1.374696  [   0/3200]
loss: 166.072510  [ 160/3200]
loss: 61.655304  [ 320/3200]
loss: 10.401013  [ 480/3200]
loss: 3.735415  [ 640/3200]
loss: 2.236290  [ 800/3200]
loss: 1.650109  [ 960/3200]
loss: 1.257095  [1120/3200]
loss: 1.238335  [1280/3200]
loss: 1.206070  [1440/3200]
loss: 0.956402  [1600/3200]
loss: 2.165908  [1760/3200]
loss: 1.556078  [1920/3200]
loss: 6.926271  [2080/3200]
loss: 1.788725  [2240/3200]
loss: 1.768656  [2400/3200]
loss: 1.956810  [2560/3200]
loss: 1.400450  [2720/3200]
loss: 2.099264  [2880/3200]
loss: 1.300523  [3040/3200]
Validation F1: 0.4851  Validation Loss: 1.0640

Epoch 2/30
-----------------------------
loss: 0.800982  [   0/3200]
loss: 0.968079  [ 160/3200]
loss: 0.858901  [ 320/3200]
loss: 2.270387  [ 480/3200]
loss: 1.328970  [ 640/3200]
loss: 1.257708  [ 800/3200]
loss: 1.156956  [ 960/3200]
loss: 1.568365  [1120/3200]
loss: 1.341130  [1280/3200]
loss: 2.226863  [1440/3200]
loss: 1.313984  [1600/320

Η χρήση padding και pooling βοηθάει το μοντέλο να λειτουργεί καλύτερα:

Το padding κρατάει τις διαστάσεις των δεδομένων πιο σταθερές μετά από κάθε συνέλιξη, ώστε να μη μικραίνουν πολύ γρήγορα.

Το max pooling μειώνει το μέγεθος των χαρακτηριστικών και συγκεντρώνει τις πιο σημαντικές πληροφορίες, κάνοντας το δίκτυο πιο αποδοτικό και γρήγορο.

Επίδοση:
Το δίκτυο με padding και pooling πέτυχε ικανοποιητική ακρίβεια και F1-score στο test set, κάτι που δείχνει ότι τα νέα στοιχεία βοήθησαν τη γενίκευση.

Χρόνος εκπαίδευσης:
Ο χρόνος εκπαίδευσης ήταν μικρότερος σε σύγκριση με το αρχικό μοντέλο, γιατί το pooling μειώνει το μέγεθος των δεδομένων που περνούν από τα επόμενα επίπεδα, κάνοντας τους υπολογισμούς πιο γρήγορου

Βήμα 5 : Activation functions

In [None]:
import torch.nn as nn
import torch

class Convolutional_Neural_Network_ReLU(nn.Module):
    def __init__(self, out_dim):
        super().__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(32, 64, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),

            nn.Conv2d(64, 128, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        #βρισκουμε τη διασταση εξοδου
        with torch.no_grad():
            dummy = torch.zeros(1, 1, 128, 21)
            out = self.conv_layers(dummy)
            flat_dim = out.view(1, -1).shape[1]

        self.fc_layers = nn.Sequential(
            nn.Linear(flat_dim, 1024),
            nn.ReLU(),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Linear(256, 32),
            nn.ReLU(),
            nn.Linear(32, out_dim)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

model_relu = Convolutional_Neural_Network_ReLU(out_dim=4).to(device)
optimizer = torch.optim.Adam(model_relu.parameters(), lr=0.002)
loss_fn = nn.CrossEntropyLoss()

start_time = time.time()
best_model_relu = train_loop(
    num_epochs=30,
    train_loader=train_loader_mel,
    val_loader=val_loader_mel,
    model=model_relu,
    loss_fn=loss_fn,
    optimizer=optimizer,
    device=device,
    show=True
)
elapsed = time.time() - start_time

test_loss, test_acc, test_f1, test_cm = evaluate(test_loader_mel, best_model_relu, loss_fn, device)

print(f"\nχρονος εκπαιδευσης με ReLU: {elapsed:.2f} sec")
print(f"αποδοση στο test set:")
print(f"Accuracy: {test_acc * 100:.2f}%")
print(f"F1 Score: {test_f1 * 100:.2f}%")
print(f"Loss: {test_loss:.4f}")
print("Confusion Matrix:")
print(test_cm)


Epoch 1/30
-----------------------------
loss: 1.418931  [   0/3200]
loss: 1.386158  [ 160/3200]
loss: 1.420490  [ 320/3200]
loss: 1.402328  [ 480/3200]
loss: 1.370353  [ 640/3200]
loss: 1.397076  [ 800/3200]
loss: 1.378441  [ 960/3200]
loss: 0.949806  [1120/3200]
loss: 2.462882  [1280/3200]
loss: 1.076606  [1440/3200]
loss: 1.321491  [1600/3200]
loss: 1.340384  [1760/3200]
loss: 1.241720  [1920/3200]
loss: 1.277127  [2080/3200]
loss: 0.980101  [2240/3200]
loss: 0.834552  [2400/3200]
loss: 1.400887  [2560/3200]
loss: 1.047007  [2720/3200]
loss: 0.734111  [2880/3200]
loss: 1.002759  [3040/3200]
Validation F1: 0.4639  Validation Loss: 1.1331

Epoch 2/30
-----------------------------
loss: 0.938908  [   0/3200]
loss: 0.899280  [ 160/3200]
loss: 0.970190  [ 320/3200]
loss: 1.196965  [ 480/3200]
loss: 0.971953  [ 640/3200]
loss: 0.660133  [ 800/3200]
loss: 0.915344  [ 960/3200]
loss: 1.015646  [1120/3200]
loss: 0.960965  [1280/3200]
loss: 1.000994  [1440/3200]
loss: 1.047608  [1600/3200]
l

Η ReLU βοηθάει σημαντικά την εκπαίδευση, καθώς κάνει το μοντέλο πιο εκφραστικό και αποτελεσματικό στην ταξινόμηση. Η βελτίωση της απόδοσης δείχνει πόσο σημαντικό είναι να χρησιμοποιούμε μη-γραμμικές ενεργοποιήσεις σε νευρωνικά δίκτυα.

Ερώτημα 3: Improving Performance

Βήμα 1 : Reproducibility

In [None]:
import torch
import numpy as np
import random

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True, warn_only=True)

    print(f"το random seed ειναι {seed}")

set_seed(42)

generator = torch.Generator().manual_seed(42)

train_loader_mel = DataLoader(train_loader_mel, batch_size=16, shuffle=True, generator=generator)
val_loader_mel = DataLoader(val_loader_mel, batch_size=16, shuffle=False)
test_loader_mel = DataLoader(test_loader_mel, batch_size=16, shuffle=False)

random seed set to 42
