In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm

In [2]:
from torchvision import models

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.features = models.vgg16(pretrained=True)
        # Modify the first layer to accept 1 channel input (for grayscale spectrograms)
        self.features.features[0] = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # Modify the final layer to output desired feature size
        self.features.classifier[6] = nn.Linear(self.features.classifier[6].in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.features(x)
        x = self.softmax(x)
        return x

In [3]:
def preprocess_spectrogram(image_path):
    img = Image.open(image_path).convert('RGB')  # Convert to RGB by duplicating channels
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to match VGG input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for RGB
    ])
    img_tensor = transform(img)
    return img_tensor

In [4]:
def load_spectrogram_dataset(input_folder):
    X = []
    y = []
    # List all files in the input folder
    files = os.listdir(input_folder)
    # Iterate over files in the folder
    for filename in files:
        if filename.endswith(".png"):  # Assuming mel spectrograms are stored as PNG files
            input_path = os.path.join(input_folder, filename)
            img_tensor = preprocess_spectrogram(input_path)
            X.append(img_tensor)
            # Extract label from filename (assuming filename is in format "abc_IEO_label_xyz.png")
            label = filename.split("_")[2]
            if label == "HAP":
                y.append(0)
            elif label == "SAD":
                y.append(1)
            elif label == "ANG":
                y.append(2)
            elif label == "DIS":
                y.append(3)
            elif label == "FEA":
                y.append(4)
            elif label == "NEU":
                y.append(5)
    return X, y

In [5]:
def train_model(model, criterion, optimizer, train_loader, device):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [6]:
def test_model(model, criterion, test_loader, device):
    model.eval()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)
    epoch_loss = running_loss / len(test_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [7]:
# def extract_features_from_folder(input_folder):
#     # Initialize the model
#     model = CNN(num_classes=3)  # 3 classes for HAPPY, SAD, ANGRY
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model.to(device)
#     model.eval()  # Set the model to evaluation mode

#     # List all files in the input folder
#     files = os.listdir(input_folder)

#     # Iterate over files in the folder
#     for filename in files:
#         if filename.endswith(".png"):  # Assuming mel spectrograms are stored as PNG files
#             input_path = os.path.join(input_folder, filename)
#             img_tensor = preprocess_image(input_path)
#             img_tensor = img_tensor.to(device)
#             with torch.no_grad():
#                 output_features = model(img_tensor)
#             print(f"Features extracted for {filename}: {output_features}")


In [8]:
# extract_features_from_folder('/content/drive/MyDrive/csci535/melspec')

In [9]:
# !python3 melspec_to_features_cnn.py /content/drive/MyDrive/csci535/melspec


In [10]:
if __name__ == "__main__":
    # Check if input arguments are provided
    # if len(sys.argv) != 2:
    #     print("Usage: python melspec_to_features_cnn.py input_folder")
    #     sys.exit(1)

    # input_folder = sys.argv[1]
    input_folder = 'melspec_fullscale'
    # Check if input folder exists
    if not os.path.exists(input_folder):
        print("Input folder does not exist.")
        sys.exit(1)

    # Load dataset and split into train and test sets
    X_spec = np.load('X_spec.npy', mmap_mode='r')
    y_spec = np.load('y_spec.npy', mmap_mode='r')

    X_spec_rgb = np.repeat(X_spec, 3, axis=1)  # Repeat the channels dimension
    print(f"Total number of samples: {len(X_spec_rgb)}")
    X_train, X_test, y_train, y_test = train_test_split(X_spec_rgb, y_spec, test_size=0.3, random_state=42)
    print(f"Number of train samples: {len(X_train)}", f"Number of test samples: {len(X_test)}")
    # Initialize the model
    model = CNN(num_classes=6)  # 3 classes for HAPPY, SAD, ANGRY
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Define loss function and optimizer
    _lr = 0.0001
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=_lr)

    # Create data loaders
    _bs = 32
    train_loader = torch.utils.data.DataLoader(list(zip(X_train, y_train)), batch_size=_bs, shuffle=True)
    test_loader = torch.utils.data.DataLoader(list(zip(X_test, y_test)), batch_size=_bs)
    print(f"Batch size: {_bs}", f"lr: {_lr}")
    # Training loop
    num_epochs = 50
    for epoch in range(num_epochs):
        print("Epoch " + str(epoch))
        train_loss, train_accuracy = train_model(model, criterion, optimizer, train_loader, device)
        test_loss, test_accuracy = test_model(model, criterion, test_loader, device)
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Total number of samples: 7473
Number of train samples: 5231 Number of test samples: 2242




Batch size: 32 lr: 0.0001
Epoch 0


100%|██████████| 164/164 [00:25<00:00,  6.37it/s]
100%|██████████| 71/71 [00:02<00:00, 31.84it/s]


Epoch 1/50, Train Loss: 1.7267, Train Accuracy: 0.2919, Test Loss: 1.7053, Test Accuracy: 0.3136
Epoch 1


100%|██████████| 164/164 [00:15<00:00, 10.30it/s]
100%|██████████| 71/71 [00:02<00:00, 32.17it/s]


Epoch 2/50, Train Loss: 1.6818, Train Accuracy: 0.3426, Test Loss: 1.6887, Test Accuracy: 0.3390
Epoch 2


100%|██████████| 164/164 [00:15<00:00, 10.32it/s]
100%|██████████| 71/71 [00:02<00:00, 31.91it/s]


Epoch 3/50, Train Loss: 1.6732, Train Accuracy: 0.3416, Test Loss: 1.6914, Test Accuracy: 0.3461
Epoch 3


100%|██████████| 164/164 [00:15<00:00, 10.32it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]


Epoch 4/50, Train Loss: 1.6796, Train Accuracy: 0.3447, Test Loss: 1.6870, Test Accuracy: 0.3296
Epoch 4


100%|██████████| 164/164 [00:15<00:00, 10.29it/s]
100%|██████████| 71/71 [00:02<00:00, 32.03it/s]


Epoch 5/50, Train Loss: 1.6814, Train Accuracy: 0.3445, Test Loss: 1.7429, Test Accuracy: 0.2904
Epoch 5


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 31.94it/s]


Epoch 6/50, Train Loss: 1.6710, Train Accuracy: 0.3527, Test Loss: 1.6732, Test Accuracy: 0.3434
Epoch 6


100%|██████████| 164/164 [00:16<00:00, 10.25it/s]
100%|██████████| 71/71 [00:02<00:00, 32.17it/s]


Epoch 7/50, Train Loss: 1.6525, Train Accuracy: 0.3808, Test Loss: 1.6742, Test Accuracy: 0.3577
Epoch 7


100%|██████████| 164/164 [00:16<00:00, 10.22it/s]
100%|██████████| 71/71 [00:02<00:00, 32.15it/s]


Epoch 8/50, Train Loss: 1.6512, Train Accuracy: 0.3800, Test Loss: 1.6536, Test Accuracy: 0.3787
Epoch 8


100%|██████████| 164/164 [00:16<00:00, 10.24it/s]
100%|██████████| 71/71 [00:02<00:00, 32.21it/s]


Epoch 9/50, Train Loss: 1.6577, Train Accuracy: 0.3758, Test Loss: 1.6658, Test Accuracy: 0.3662
Epoch 9


100%|██████████| 164/164 [00:16<00:00, 10.24it/s]
100%|██████████| 71/71 [00:02<00:00, 32.20it/s]


Epoch 10/50, Train Loss: 1.6411, Train Accuracy: 0.3885, Test Loss: 1.6758, Test Accuracy: 0.3439
Epoch 10


100%|██████████| 164/164 [00:16<00:00, 10.25it/s]
100%|██████████| 71/71 [00:02<00:00, 32.24it/s]


Epoch 11/50, Train Loss: 1.6384, Train Accuracy: 0.3921, Test Loss: 1.6545, Test Accuracy: 0.3782
Epoch 11


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.25it/s]


Epoch 12/50, Train Loss: 1.6384, Train Accuracy: 0.3969, Test Loss: 1.6978, Test Accuracy: 0.3394
Epoch 12


100%|██████████| 164/164 [00:16<00:00, 10.25it/s]
100%|██████████| 71/71 [00:02<00:00, 32.08it/s]


Epoch 13/50, Train Loss: 1.6313, Train Accuracy: 0.4030, Test Loss: 1.6423, Test Accuracy: 0.3907
Epoch 13


100%|██████████| 164/164 [00:16<00:00, 10.24it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]


Epoch 14/50, Train Loss: 1.6103, Train Accuracy: 0.4232, Test Loss: 1.6109, Test Accuracy: 0.4206
Epoch 14


100%|██████████| 164/164 [00:16<00:00, 10.24it/s]
100%|██████████| 71/71 [00:02<00:00, 32.13it/s]


Epoch 15/50, Train Loss: 1.6073, Train Accuracy: 0.4276, Test Loss: 1.6221, Test Accuracy: 0.4117
Epoch 15


100%|██████████| 164/164 [00:16<00:00, 10.25it/s]
100%|██████████| 71/71 [00:02<00:00, 32.26it/s]


Epoch 16/50, Train Loss: 1.6236, Train Accuracy: 0.4110, Test Loss: 1.5890, Test Accuracy: 0.4438
Epoch 16


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.18it/s]


Epoch 17/50, Train Loss: 1.5810, Train Accuracy: 0.4565, Test Loss: 1.6029, Test Accuracy: 0.4322
Epoch 17


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.21it/s]


Epoch 18/50, Train Loss: 1.5935, Train Accuracy: 0.4433, Test Loss: 1.6212, Test Accuracy: 0.4090
Epoch 18


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.22it/s]


Epoch 19/50, Train Loss: 1.5897, Train Accuracy: 0.4481, Test Loss: 1.6144, Test Accuracy: 0.4184
Epoch 19


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]


Epoch 20/50, Train Loss: 1.5875, Train Accuracy: 0.4487, Test Loss: 1.6473, Test Accuracy: 0.3889
Epoch 20


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.29it/s]


Epoch 21/50, Train Loss: 1.5857, Train Accuracy: 0.4500, Test Loss: 1.5768, Test Accuracy: 0.4554
Epoch 21


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]


Epoch 22/50, Train Loss: 1.5598, Train Accuracy: 0.4745, Test Loss: 1.5813, Test Accuracy: 0.4527
Epoch 22


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.25it/s]


Epoch 23/50, Train Loss: 1.5653, Train Accuracy: 0.4699, Test Loss: 1.5667, Test Accuracy: 0.4683
Epoch 23


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]


Epoch 24/50, Train Loss: 1.5417, Train Accuracy: 0.4972, Test Loss: 1.5710, Test Accuracy: 0.4657
Epoch 24


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.18it/s]


Epoch 25/50, Train Loss: 1.5360, Train Accuracy: 0.5005, Test Loss: 1.6049, Test Accuracy: 0.4269
Epoch 25


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.25it/s]


Epoch 26/50, Train Loss: 1.5618, Train Accuracy: 0.4735, Test Loss: 1.5840, Test Accuracy: 0.4514
Epoch 26


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.20it/s]


Epoch 27/50, Train Loss: 1.5420, Train Accuracy: 0.4970, Test Loss: 1.5730, Test Accuracy: 0.4652
Epoch 27


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.29it/s]


Epoch 28/50, Train Loss: 1.5369, Train Accuracy: 0.4995, Test Loss: 1.5755, Test Accuracy: 0.4634
Epoch 28


100%|██████████| 164/164 [00:15<00:00, 10.26it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]


Epoch 29/50, Train Loss: 1.5175, Train Accuracy: 0.5198, Test Loss: 1.5439, Test Accuracy: 0.4942
Epoch 29


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.27it/s]


Epoch 30/50, Train Loss: 1.5123, Train Accuracy: 0.5263, Test Loss: 1.5421, Test Accuracy: 0.4951
Epoch 30


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.23it/s]


Epoch 31/50, Train Loss: 1.5010, Train Accuracy: 0.5389, Test Loss: 1.5448, Test Accuracy: 0.4920
Epoch 31


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.20it/s]


Epoch 32/50, Train Loss: 1.5160, Train Accuracy: 0.5244, Test Loss: 1.5452, Test Accuracy: 0.4924
Epoch 32


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.28it/s]


Epoch 33/50, Train Loss: 1.5291, Train Accuracy: 0.5102, Test Loss: 1.5793, Test Accuracy: 0.4558
Epoch 33


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.17it/s]


Epoch 34/50, Train Loss: 1.5325, Train Accuracy: 0.5053, Test Loss: 1.5588, Test Accuracy: 0.4773
Epoch 34


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.25it/s]


Epoch 35/50, Train Loss: 1.5126, Train Accuracy: 0.5261, Test Loss: 1.5415, Test Accuracy: 0.4955
Epoch 35


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.24it/s]


Epoch 36/50, Train Loss: 1.4969, Train Accuracy: 0.5399, Test Loss: 1.5401, Test Accuracy: 0.4991
Epoch 36


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.29it/s]


Epoch 37/50, Train Loss: 1.4886, Train Accuracy: 0.5509, Test Loss: 1.5530, Test Accuracy: 0.4826
Epoch 37


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.25it/s]


Epoch 38/50, Train Loss: 1.4714, Train Accuracy: 0.5695, Test Loss: 1.5460, Test Accuracy: 0.4933
Epoch 38


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.21it/s]


Epoch 39/50, Train Loss: 1.4825, Train Accuracy: 0.5559, Test Loss: 1.5905, Test Accuracy: 0.4465
Epoch 39


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.28it/s]


Epoch 40/50, Train Loss: 1.4997, Train Accuracy: 0.5374, Test Loss: 1.5469, Test Accuracy: 0.4929
Epoch 40


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.21it/s]


Epoch 41/50, Train Loss: 1.4594, Train Accuracy: 0.5810, Test Loss: 1.5255, Test Accuracy: 0.5103
Epoch 41


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.24it/s]


Epoch 42/50, Train Loss: 1.4707, Train Accuracy: 0.5695, Test Loss: 1.5191, Test Accuracy: 0.5219
Epoch 42


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.18it/s]


Epoch 43/50, Train Loss: 1.4450, Train Accuracy: 0.5961, Test Loss: 1.5085, Test Accuracy: 0.5312
Epoch 43


100%|██████████| 164/164 [00:15<00:00, 10.27it/s]
100%|██████████| 71/71 [00:02<00:00, 32.23it/s]


Epoch 44/50, Train Loss: 1.4513, Train Accuracy: 0.5875, Test Loss: 1.5395, Test Accuracy: 0.4951
Epoch 44


100%|██████████| 164/164 [00:15<00:00, 10.29it/s]
100%|██████████| 71/71 [00:02<00:00, 32.20it/s]


Epoch 45/50, Train Loss: 1.4451, Train Accuracy: 0.5955, Test Loss: 1.5608, Test Accuracy: 0.4773
Epoch 45


100%|██████████| 164/164 [00:15<00:00, 10.29it/s]
100%|██████████| 71/71 [00:02<00:00, 32.14it/s]


Epoch 46/50, Train Loss: 1.4384, Train Accuracy: 0.6033, Test Loss: 1.5428, Test Accuracy: 0.4982
Epoch 46


100%|██████████| 164/164 [00:15<00:00, 10.29it/s]
100%|██████████| 71/71 [00:02<00:00, 32.28it/s]


Epoch 47/50, Train Loss: 1.4235, Train Accuracy: 0.6171, Test Loss: 1.5268, Test Accuracy: 0.5116
Epoch 47


100%|██████████| 164/164 [00:15<00:00, 10.28it/s]
100%|██████████| 71/71 [00:02<00:00, 32.22it/s]


Epoch 48/50, Train Loss: 1.4162, Train Accuracy: 0.6234, Test Loss: 1.5224, Test Accuracy: 0.5192
Epoch 48


100%|██████████| 164/164 [00:15<00:00, 10.29it/s]
100%|██████████| 71/71 [00:02<00:00, 32.24it/s]


Epoch 49/50, Train Loss: 1.3957, Train Accuracy: 0.6460, Test Loss: 1.5271, Test Accuracy: 0.5067
Epoch 49


100%|██████████| 164/164 [00:15<00:00, 10.29it/s]
100%|██████████| 71/71 [00:02<00:00, 32.19it/s]

Epoch 50/50, Train Loss: 1.4038, Train Accuracy: 0.6366, Test Loss: 1.5259, Test Accuracy: 0.5120





In [11]:
torch.save(model.state_dict(), 'vgg16_melspec_'+str(num_epochs)+'_'+str(_bs)+'_'+str(_lr))

In [15]:
# ! ls -lh /content/