In [1]:
import feature_extraction as fe

import numpy as np

import os
import random
import pickle
import warnings
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

seed = 3
random.seed(seed)
torch.manual_seed(seed)

warnings.filterwarnings('ignore')

In [2]:
def data_batcher(data, batch_size):
    random.shuffle(data)
    return [data[i:i + batch_size] for i in range(0, len(data), batch_size)]

In [3]:
dataset = 'GTZAN'
new_spects = False


dataset_filename = f'../datasets/{dataset}.gui'
pickle_filename = f'./feats_spects-pickles/{dataset_filename.split("/")[-1][:-4]}_spects.pkl'

#spects, labels = fe.extract_spectrograms(dataset_filename)
if new_spects or not os.path.isfile(pickle_filename):
    spects, labels = fe.extract_spectrograms(dataset_filename)
    with open(pickle_filename, 'wb') as f:
        pickle.dump([spects, labels], f)
else:
    with open(pickle_filename, 'rb') as f:
        spects, labels = pickle.load(f)
    print(f'{spects.shape[0]} spectrograms extracted')

1000 spectrograms extracted


In [4]:
scaler = StandardScaler()
#scaled_feats = scaler.fit_transform(feats)

x_trainval, x_test, y_trainval, y_test = train_test_split(spects, labels, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.25)

train_data = [(x, y) for x, y in zip(x_train, y_train)]
val_data = [(x, y) for x, y in zip(x_val, y_val)]
test_data = [(x, y) for x, y in zip(x_test, y_test)]

print(len(train_data), "training samples")
print(len(val_data), "validation samples")
print(len(test_data), "test samples")

600 training samples
200 validation samples
200 test samples


In [5]:
np.shape(train_data)

(600, 2)

In [6]:
#Input = (N,Cin,Lin) 
    #N = batch size
    #Cin = Channels (np.shape(spectrograms[0])[0])
    #Lin = np.shape(spectrograms[0])[1]
#Lout = ((Lin + 2*padding -dilation*(kernel_size-1)-1)/stride) + 1

In [197]:
class CNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, embedding_size, num_layers):
        super(CNNClassifier, self).__init__()
        self.conv1d1 = nn.Conv1d(input_size, hidden_size[0], kernel_size) #128-4=124
        self.conv1d1_bn = nn.BatchNorm1d(n_features[0], momentum=0.9) 
        self.pool1 = torch.nn.MaxPool1d(kernel_size=2) #124/2 = 62
        self.dropout1 = nn.Dropout(0.5)
        
        self.conv1d2 = nn.Conv1d(hidden_size[0], hidden_size[1], kernel_size) #62-4 = 58 
        self.conv1d2_bn = nn.BatchNorm1d(n_features[1], momentum=0.9)
        self.pool2 = torch.nn.MaxPool1d(kernel_size=2) #58/2=29
        self.dropout2 = nn.Dropout(0.5)
        
        self.conv1d3 = nn.Conv1d(hidden_size[1], hidden_size[2], kernel_size) #29-4=25
        self.conv1d3_bn = nn.BatchNorm1d(n_features[2], momentum=0.9)
        self.pool3 = torch.nn.MaxPool1d(kernel_size=2) #25/2=12,5
        self.dropout3 = nn.Dropout(0.5)
        
        self.h0 = torch.randn(2*num_layers, embedding_size, hidden_size[2]*12)
        self.c0 = torch.randn(2*num_layers, embedding_size, hidden_size[2]*12)
        
        #self.fc1 = nn.Linear(hidden_size[2]*12, 100)
        
        #self.rnn = torch.nn.LSTM(embedding_size, hidden_size[2]*12, num_layers, bidirectional = True )
        #self.rnn = torch.nn.LSTM(hidden_size[2]*12, hidden_size[2]*12, num_layers, bidirectional=True, batch_first=True )
        self.rnn = torch.nn.LSTM(12, 24, num_layers, batch_first=True, bidirectional=True)
        
        
        self.fc2 = nn.Linear(3072, num_classes)
        
    
    def forward(self, x):#, input_lengths):
        #order: conv, batchnorm, activation, maxpool, drop
        #Calls:
        #Convolution 1d:      out = F.relu(self.conv1d1(x))
        #Batch normalization: out = self.conv1d1_bn(out)
        #Dropout:             out = self.dropout1(out)
        #Max Pooling:         out = self.pool1(out)
        
        #Layer1
        print('Input shape:', x.shape)
        print('---\nLayer 1')
        out = F.relu(self.conv1d1_bn(self.conv1d1(x)))
        print('After CNN:', out.shape)
        out = self.pool1(out)
        print('After Pooling:', out.shape)
        out = self.dropout1(out)
        
        #Layer2
        print('---\nLayer 2')
        out = F.relu(self.conv1d2_bn(self.conv1d2(out)))
        print('After CNN:', out.shape)
        out = self.pool2(out)
        print('After Pooling:', out.shape)
        out = self.dropout2(out)
        
        #Layer3
        print('---\nLayer 3')
        out = F.relu(self.conv1d3_bn(self.conv1d3(out)))
        print('After CNN:', out.shape)
        out = self.pool3(out)
        print('After Pooling:', out.shape)
        out = self.dropout3(out)
        
        #Flatten
        #out = out.view(out.shape[0], -1)
        #print('---\nAfter Flatten:', out.shape)
        
        #FC1
        #out = self.fc1(out)
        
        #LSTM
        #print(input_lengths)
        #packed = torch.nn.utils.rnn.pack_padded_sequence(out, input_lengths)
        #print('---\nAfter Packing:', packed.shape)
        print('---\nLSTM')
        out, (h_n, c_n) = self.rnn(out)
        print('After LSTM:', out.shape)
        
        #Flatten
        out = out.contiguous().view(out.shape[0], -1)
        print('---\nAfter Flatten:', out.shape)
        
        #FC1
        out = self.fc2(out)
        
        return out # we can return here the softmax, but then we should use the nll_loss instead of the cross_entropy
    

#model = CNNClassifier(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
#criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 

In [204]:
def train(log_interval, model, device, data_batches, optimizer, epoch):
    losses = []
    len_glob_data = np.prod(np.array(np.array(data_batches).shape[:-1]))
    
    model.train()
    
    total_loss = 0
    ncorrect = 0
    nsentences = 0
    ntokens = 0
    niterations = 0
    
    for i, batch in enumerate(data_batches):
        x, y = list(zip(*batch))
        
        #
        X = [torch.from_numpy(d[0]) for d in batch]
        X_lengths = [x.numel() for x in X]
        ntokens += sum(X_lengths)
        X_lengths = torch.tensor(X_lengths, dtype=torch.long, device=device)
        
        
        # Move tensors to the configured device
        data = torch.from_numpy(np.array(x)).float().to(device)
        target = torch.from_numpy(np.array(y)).to(device)
        
        # clear all the gradients of the optimized tensors
        optimizer.zero_grad()
        
        # Forward pass
        output = model(data)#, X_lengths)
        loss = criterion(output, target)
        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        
        if (i+1) % log_interval == 0:
            print(f'Train Epoch: {epoch+1} [{(i+1)*len(data)}/{len_glob_data} ({str(int(100. * (i+1)*len(data) / len_glob_data)).zfill(2)}%)]\tLoss: {round(loss.item(),6)}')
    return np.array(losses).mean()

In [205]:
def validate(model, device, data_batches):
    len_glob_data = np.prod(np.array(np.array(data_batches).shape[:-1]))
    
    model.eval()  # let's put the model in evaluation mode

    validation_loss = []
    correct = 0
    with torch.no_grad():  # we don't need gradient computation at all
        for i, batch in enumerate(data_batches):
            x, y = list(zip(*batch))
        
            data = torch.from_numpy(np.array(x)).float().to(device)
            target = torch.from_numpy(np.array(y)).to(device)
            
            output = model(data)
            loss = criterion(output, target)
            
            validation_loss.append(loss.item())
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    validation_loss = np.array(validation_loss).mean()
    accuracy = 100. * correct / len_glob_data

    print(f'\nValidation set: Average loss: {round(validation_loss, 4)}, Accuracy: {correct}/{len_glob_data} ({int(accuracy)}%)\n')
    
    return accuracy, validation_loss

In [206]:
# Hyperparameters
input_size = 640
hidden_size = [256, 128, 64]
num_classes = 10
n_features = [256, 128, 64] #Get from xtrain (see BatchNorm1d)
kernel_size = 5
train_batch_size = 100
valid_batch_size = 100
embedding_size = 64
num_layers = 1


num_epochs = 100
learning_rate = 0.0001

In [None]:
train_data_batches = data_batcher(train_data, train_batch_size)
val_data_batches = data_batcher(val_data, valid_batch_size)

model = CNNClassifier(input_size, hidden_size, num_classes, embedding_size, num_layers).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_losses = []
valid_losses = []
accuracies = []

for epoch in range(num_epochs):
    
    train_loss = train(2, model, device, train_data_batches, optimizer, epoch)
    train_losses.append(train_loss)

    accuracy, valid_loss = validate(model, device, val_data_batches)
    valid_losses.append(valid_loss)
    accuracies.append(accuracy)

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
Afte

---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input sh

After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])

Validation set: Average loss: 2.2943, Accuracy: 17/200 (8%)

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 12

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
Afte

After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])

Validation set: Average loss: 2.2285, Accuracy: 23/200 (11%)

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 1

After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.S

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
Afte

After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
Aft

After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LST

After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])

Validation set: Average loss: 2.1112, Accuracy: 39/200 (19%)

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 2

After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.S

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
Afte

After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After

---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After 

After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])

Validation set: Average loss: 2.353, Accuracy: 39/200 (19%)

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25

---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LS

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])

Validation set: Average loss: 2.8993, Accuracy: 32/200 (16%)

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 2

Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
After Pooling: torch.Size([100, 128, 29])
---
Layer 3
After CNN: torch.Size([100, 64, 25])
After Pooling: torch.Size([100, 64, 12])
---
LSTM
After LSTM: torch.Size([100, 64, 48])
---
After Flatten: torch.Size([100, 3072])
Input shape: torch.Size([100, 640, 128])
---
Layer 1
After CNN: torch.Size([100, 256, 124])
After Pooling: torch.Size([100, 256, 62])
---
Layer 2
After CNN: torch.Size([100, 128, 58])
Afte

In [None]:
fig = plt.figure()
fig.subplots_adjust(right=2.5)

ax1 = fig.add_subplot(121)
ax1.set_title('Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.plot(train_losses, 'b-', label='Train')
ax1.plot(valid_losses, 'r-', label='Validation')
ax1.legend(loc='upper right')

ax1 = fig.add_subplot(122)
ax1.set_title('Accuracy')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Accuracy (%)')
ax1.plot(accuracies, 'b-')

plt.show()