In [1]:
import torch.optim as optim
import torch
import torchvision.models as models
import os
import argparse
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import librosa
import cv2
from tqdm import tqdm
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
root_dir = '.'
epochs = 50
batch_size = 32
lr = 1e-3
momentum = 0.9
weight_decay = 5e-4

In [3]:
def read_data(src_dir, genres, song_samples, spec_format, debug = True):
    # Empty array of dicts with the processed features from all files
    arr_specs = []
    arr_genres = []

    # Read files from the folders
    for x,_ in genres.items():
        folder = src_dir + x

        for root, subdirs, files in tqdm(os.walk(folder)):
            for file in files:
                # Read the audio file
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                signal = signal[:song_samples]

                # Debug process
                if debug:
                    print("Reading file: {}".format(file_name))

                # Convert to dataset of spectograms/melspectograms
                signals, y = splitsongs(signal, genres[x])
                # Convert to "spec" representation
                specs = spec_format(signals)

                # Save files
                arr_genres.extend(y)
                arr_specs.extend(specs)

    return np.array(arr_specs), np.array(arr_genres)

def to_melspectrogram(songs, n_fft = 1024, hop_length = 512):
    '''
    Method to convert a list of songs to a np array of melspectrograms
    '''
    # Transformation function
    melspec = lambda x: librosa.feature.melspectrogram(y = x, n_fft = n_fft,
        hop_length = hop_length)[:,:,np.newaxis]

    # map transformation of input songs to melspectrogram using log-scale
    tsongs = map(melspec, songs)
    return np.array(list(tsongs))

def splitsongs(X, y, window = 0.1, overlap = 0.5):
    '''
    Method to split a song into multiple songs using overlapping windows
    '''
    # Empty lists to hold our results
    temp_X = []
    temp_y = []

    # Get the input song array size
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))

    # Split the song and create new ones on windows
    spsong = [X[i:i+chunk] for i in range(0, xshape - chunk + offset, offset)]
    for s in spsong:
        temp_X.append(s)
        temp_y.append(y)

    return np.array(temp_X), np.array(temp_y)


In [4]:
gtzan_dir = root_dir + '/genres/'
song_samples = 660000
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4,
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

In [5]:
print("Reading in the data..")
if os.path.isfile(os.path.join(root_dir, "x_gtzan_npy.npy")) and os.path.isfile(os.path.join(root_dir, "y_gtzan_npy.npy")):
    X = np.load("x_gtzan_npy.npy")
    y = np.load("y_gtzan_npy.npy")
    print("Using saved training data..")
else: 
    X, y = read_data(gtzan_dir, genres, song_samples, to_melspectrogram, debug=False)
    np.save('x_gtzan_npy.npy', X)
    np.save('y_gtzan_npy.npy', y)

Reading in the data..
Using saved training data..


In [6]:
def normalize(img, mean, std):
    img = img/255.0
    img[0] = (img[0] - mean[0]) / std[0]
    img[1] = (img[1] - mean[1]) / std[1]
    img[2] = (img[2] - mean[2]) / std[2]
    img = np.clip(img, 0.0, 1.0)

    return img

def get_train_test(X, y):
    X_train, x_test_valid, y_train, y_test_valid = train_test_split(X,y, train_size=0.8)
    X_val, X_test, y_val, y_test = train_test_split(x_test_valid,y_test_valid, test_size=0.5)

    X_train_rgb = []
    X_val_rgb = []
    X_test_rgb = []

    for i in range(X_train.shape[0]):
        X_train_sub = np.concatenate((X_train[i, :,:,:], X_train[i, :,:, :].reshape(128, 129, 1)), axis=2)
        X_train_sub = np.concatenate((X_train_sub, X_train[i, :,:, 0].reshape(128, 129, 1)), axis=2)
        X_train_sub = cv2.resize(X_train_sub, (224, 224))
        X_train_sub = normalize(X_train_sub, mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        X_train_rgb.append(X_train_sub)
        
    for i in range(X_val.shape[0]):
        X_val_sub = np.concatenate((X_val[i, :,:,:], X_val[i, :,:, :].reshape(128, 129, 1)), axis=2)
        X_val_sub = np.concatenate((X_val_sub, X_val[i, :,:, 0].reshape(128, 129, 1)), axis=2)
        X_val_sub = cv2.resize(X_val_sub, (224, 224))
        X_val_sub = normalize(X_val_sub, mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
        X_val_rgb.append(X_val_sub)
        
    for i in range(X_test.shape[0]):
        X_test_sub = np.concatenate((X_test[i, :,:,:], X_test[i, :,:, :].reshape(128, 129, 1)), axis=2)
        X_test_sub = np.concatenate((X_test_sub, X_test[i, :,:, 0].reshape(128, 129, 1)), axis=2)
        X_test_sub = cv2.resize(X_test_sub, (224, 224))
        X_test_sub = normalize(X_test_sub, mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])

        X_test_rgb.append(X_test_sub)

    X_train_rgb = np.array(X_train_rgb, dtype=np.float32)
    X_test_rgb = np.array(X_test_rgb, dtype=np.float32)
    X_val_rgb = np.array(X_val_rgb, dtype=np.float32)

    # print("shape before reshape", X_train_rgb.shape, X_test_rgb.shape)
    X_train_rgb = X_train_rgb.reshape(-1, 3, 224, 224)
    X_val_rgb = X_val_rgb.reshape(-1, 3, 224, 224)
    X_test_rgb = X_test_rgb.reshape(-1, 3, 224, 224)

    return X_train_rgb, X_val_rgb, X_test_rgb, y_train, y_val, y_test

In [7]:
X_train, X_val, X_test, y_train,y_val, y_test = get_train_test(X, y)

In [8]:
train_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_val), torch.Tensor(y_val))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

# CNN

In [111]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=256*14*14, out_features=1024)
        self.bn5 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=512)
        self.bn6 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(in_features=512, out_features=10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.flatten(1)
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.bn5(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn6(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [118]:
net = CNN().to(device)
net.train()
optimizer = optim.Adam(net.parameters(), lr=lr,
                    weight_decay=weight_decay)

In [119]:
#Trianing + Validation
for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader):
        # Get the inputs and labels
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"Epoch {epoch+1}, Batch {i+1}: Loss {running_loss/100:.3f} Accuracy {correct / total:.3f}")
            running_loss = 0.0
            total = 0
            correct = 0
            
    #validation 
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            if device.type == 'cuda':
                inputs = inputs.cuda()
                labels = labels.type(torch.LongTensor).cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct / total
    print(f"Epoch {epoch+1}, Validation Accuracy: {val_acc:.3f}")

Epoch 1, Batch 100: Loss 1.904 Accuracy 0.303
Epoch 1, Batch 200: Loss 1.529 Accuracy 0.443
Epoch 1, Batch 300: Loss 1.291 Accuracy 0.555
Epoch 1, Batch 400: Loss 1.210 Accuracy 0.578
Epoch 1, Validation Accuracy: 0.616
Epoch 2, Batch 100: Loss 1.048 Accuracy 0.640
Epoch 2, Batch 200: Loss 1.053 Accuracy 0.644
Epoch 2, Batch 300: Loss 1.032 Accuracy 0.647
Epoch 2, Batch 400: Loss 1.020 Accuracy 0.655
Epoch 2, Validation Accuracy: 0.678
Epoch 3, Batch 100: Loss 0.920 Accuracy 0.692
Epoch 3, Batch 200: Loss 0.917 Accuracy 0.682
Epoch 3, Batch 300: Loss 0.886 Accuracy 0.694
Epoch 3, Batch 400: Loss 0.932 Accuracy 0.682
Epoch 3, Validation Accuracy: 0.649
Epoch 4, Batch 100: Loss 0.846 Accuracy 0.724
Epoch 4, Batch 200: Loss 0.831 Accuracy 0.709
Epoch 4, Batch 300: Loss 0.860 Accuracy 0.711
Epoch 4, Batch 400: Loss 0.822 Accuracy 0.724
Epoch 4, Validation Accuracy: 0.717
Epoch 5, Batch 100: Loss 0.775 Accuracy 0.738
Epoch 5, Batch 200: Loss 0.770 Accuracy 0.738
Epoch 5, Batch 300: Loss 0.8

In [121]:
#Test
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_acc = correct / total
print(f" Test Accuracy: {val_acc:.3f}")

 Test Accuracy: 0.823


# ResNet50

In [114]:
net = models.resnet50(pretrained=True)

for param in net.parameters():
        param.requires_grad = False

new_fc = nn.Sequential(*list(net.fc.children())[:-1] + [nn.Linear(2048, 10)])
net.fc = new_fc
net = net.to(device)
# net = net.type(torch.cuda.DoubleTensor)
net.train()
optimizer = optim.Adam(net.parameters(), lr=lr,
                    weight_decay=weight_decay)



In [110]:
#Trianing + Validation
for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader):
        # Get the inputs and labels
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"Epoch {epoch+1}, Batch {i+1}: Loss {running_loss/100:.3f} Accuracy {correct / total:.3f}")
            running_loss = 0.0
            total = 0
            correct = 0
            
    #validation 
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            if device.type == 'cuda':
                inputs = inputs.cuda()
                labels = labels.type(torch.LongTensor).cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct / total
    print(f"Epoch {epoch+1}, Validation Accuracy: {val_acc:.3f}")


Epoch 1, Batch 100: Loss 1.909 Accuracy 0.309
Epoch 1, Batch 200: Loss 1.635 Accuracy 0.422
Epoch 1, Batch 300: Loss 1.565 Accuracy 0.449
Epoch 1, Batch 400: Loss 1.453 Accuracy 0.490
Epoch 1, Validation Accuracy: 0.477
Epoch 2, Batch 100: Loss 1.412 Accuracy 0.500
Epoch 2, Batch 200: Loss 1.367 Accuracy 0.527
Epoch 2, Batch 300: Loss 1.370 Accuracy 0.513
Epoch 2, Batch 400: Loss 1.356 Accuracy 0.527
Epoch 2, Validation Accuracy: 0.507
Epoch 3, Batch 100: Loss 1.309 Accuracy 0.550
Epoch 3, Batch 200: Loss 1.334 Accuracy 0.537
Epoch 3, Batch 300: Loss 1.307 Accuracy 0.556
Epoch 3, Batch 400: Loss 1.333 Accuracy 0.531
Epoch 3, Validation Accuracy: 0.522
Epoch 4, Batch 100: Loss 1.265 Accuracy 0.557
Epoch 4, Batch 200: Loss 1.301 Accuracy 0.553
Epoch 4, Batch 300: Loss 1.272 Accuracy 0.568
Epoch 4, Batch 400: Loss 1.309 Accuracy 0.548
Epoch 4, Validation Accuracy: 0.529
Epoch 5, Batch 100: Loss 1.219 Accuracy 0.579
Epoch 5, Batch 200: Loss 1.258 Accuracy 0.560
Epoch 5, Batch 300: Loss 1.2

In [100]:
#Test
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_acc = correct / total
print(f" Test Accuracy: {val_acc:.3f}")

 Test Accuracy: 0.624


# CNN+Learning Rate Decay


In [10]:
from torch.optim.lr_scheduler import StepLR

In [28]:
class CNN_lr(nn.Module):
    def __init__(self):
        super(CNN_lr, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=256*14*14, out_features=512)
        self.bn5 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(in_features=512, out_features=10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)
        x = self.pool(x)
        x = x.flatten(1)
        x = self.fc1(x)
        x = self.bn5(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [29]:
net = CNN_lr().to(device)
net.train()
optimizer = optim.Adam(net.parameters(), lr=lr,
                    weight_decay=weight_decay)
scheduler = StepLR(optimizer, step_size=20, gamma=0.1)

In [30]:
#Trianing + Validation
for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader):
        # Get the inputs and labels
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"Epoch {epoch+1}, Batch {i+1}: Loss {running_loss/100:.3f} Accuracy {correct / total:.3f}")
            running_loss = 0.0
            total = 0
            correct = 0
            
    #validation 
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            if device.type == 'cuda':
                inputs = inputs.cuda()
                labels = labels.type(torch.LongTensor).cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct / total
    print(f"Epoch {epoch+1}, Validation Accuracy: {val_acc:.3f}")
    
    scheduler.step()

Epoch 1, Batch 100: Loss 1.859 Accuracy 0.336
Epoch 1, Batch 200: Loss 1.419 Accuracy 0.497
Epoch 1, Batch 300: Loss 1.220 Accuracy 0.591
Epoch 1, Batch 400: Loss 1.133 Accuracy 0.623
Epoch 1, Validation Accuracy: 0.642
Epoch 2, Batch 100: Loss 1.005 Accuracy 0.660
Epoch 2, Batch 200: Loss 0.956 Accuracy 0.672
Epoch 2, Batch 300: Loss 0.947 Accuracy 0.680
Epoch 2, Batch 400: Loss 0.930 Accuracy 0.682
Epoch 2, Validation Accuracy: 0.688
Epoch 3, Batch 100: Loss 0.772 Accuracy 0.748
Epoch 3, Batch 200: Loss 0.820 Accuracy 0.727
Epoch 3, Batch 300: Loss 0.841 Accuracy 0.716
Epoch 3, Batch 400: Loss 0.869 Accuracy 0.709
Epoch 3, Validation Accuracy: 0.739
Epoch 4, Batch 100: Loss 0.686 Accuracy 0.782
Epoch 4, Batch 200: Loss 0.723 Accuracy 0.753
Epoch 4, Batch 300: Loss 0.745 Accuracy 0.743
Epoch 4, Batch 400: Loss 0.746 Accuracy 0.757
Epoch 4, Validation Accuracy: 0.743
Epoch 5, Batch 100: Loss 0.584 Accuracy 0.805
Epoch 5, Batch 200: Loss 0.650 Accuracy 0.778
Epoch 5, Batch 300: Loss 0.6

In [31]:
#Test
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_acc = correct / total
print(f" Test Accuracy: {val_acc:.3f}")

 Test Accuracy: 0.865


# CNN+Learning Rate Decay+Residual Block

In [32]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        residual = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x += residual
        x = F.relu(x)
        return x

class ResCNN(nn.Module):
    def __init__(self):
        super(ResCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.res1 = ResBlock(32, 32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.res2 = ResBlock(64, 64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.res3 = ResBlock(128, 128)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.res4 = ResBlock(256, 256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(in_features=256*14*14, out_features=512)
        self.bn5 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(in_features=512, out_features=10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.res1(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.res2(x)
        x = self.pool(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.res3(x)
        x = self.pool(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu(x)
        x = self.res4(x)
        x = self.pool(x)
        x = x.flatten(1)
        x = self.fc1(x)
        x = self.bn5(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [33]:
net = ResCNN().to(device)
net.train()
optimizer = optim.Adam(net.parameters(), lr=lr,
                    weight_decay=weight_decay)
scheduler = StepLR(optimizer, step_size=20, gamma=0.1)

In [34]:
#Trianing + Validation
for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(train_loader):
        # Get the inputs and labels
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:
            print(f"Epoch {epoch+1}, Batch {i+1}: Loss {running_loss/100:.3f} Accuracy {correct / total:.3f}")
            running_loss = 0.0
            total = 0
            correct = 0
            
    #validation 
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            if device.type == 'cuda':
                inputs = inputs.cuda()
                labels = labels.type(torch.LongTensor).cuda()
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = correct / total
    print(f"Epoch {epoch+1}, Validation Accuracy: {val_acc:.3f}")
    
    scheduler.step()

Epoch 1, Batch 100: Loss 1.833 Accuracy 0.339
Epoch 1, Batch 200: Loss 1.545 Accuracy 0.457
Epoch 1, Batch 300: Loss 1.365 Accuracy 0.525
Epoch 1, Batch 400: Loss 1.255 Accuracy 0.569
Epoch 1, Validation Accuracy: 0.609
Epoch 2, Batch 100: Loss 1.048 Accuracy 0.642
Epoch 2, Batch 200: Loss 1.044 Accuracy 0.644
Epoch 2, Batch 300: Loss 1.023 Accuracy 0.652
Epoch 2, Batch 400: Loss 0.957 Accuracy 0.677
Epoch 2, Validation Accuracy: 0.655
Epoch 3, Batch 100: Loss 0.866 Accuracy 0.703
Epoch 3, Batch 200: Loss 0.861 Accuracy 0.712
Epoch 3, Batch 300: Loss 0.855 Accuracy 0.710
Epoch 3, Batch 400: Loss 0.796 Accuracy 0.730
Epoch 3, Validation Accuracy: 0.705
Epoch 4, Batch 100: Loss 0.677 Accuracy 0.767
Epoch 4, Batch 200: Loss 0.716 Accuracy 0.765
Epoch 4, Batch 300: Loss 0.762 Accuracy 0.745
Epoch 4, Batch 400: Loss 0.745 Accuracy 0.742
Epoch 4, Validation Accuracy: 0.751
Epoch 5, Batch 100: Loss 0.581 Accuracy 0.808
Epoch 5, Batch 200: Loss 0.616 Accuracy 0.793
Epoch 5, Batch 300: Loss 0.6

In [35]:
#Test
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        inputs, labels = data
        if device.type == 'cuda':
            inputs = inputs.cuda()
            labels = labels.type(torch.LongTensor).cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_acc = correct / total
print(f" Test Accuracy: {val_acc:.3f}")

 Test Accuracy: 0.886
