In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import batch_data_loader
import numpy as np

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

CURRENT_MODEL = "40_segments_more_epochs_180.pth"

print(f"Using {device} device")

DETECTED_SEGMENTS = 50 #number of pitch/timbre segments being used
TEST_SPLIT = 0.15 #percentage of data being used for testing (FROM TESTING FOLDER)
VALIDATION_SPLIT = 0.1 #percentage of data being used for validation (FROM TESTING FOLDER)
BATCH_SIZE = 1024

train_loader, test_loader, validation_loader = batch_data_loader.load_MSD(DETECTED_SEGMENTS, TEST_SPLIT,\
                                                                                VALIDATION_SPLIT, BATCH_SIZE)

numClasses = 15

print("Data Loaded")

Using cuda device
initialized loader with 631 files and 638 chunks
initialized loader with 96 files and 97 chunks
initialized loader with 64 files and 65 chunks
Data Loaded


In [None]:
import time
class TwoChannelCNN(nn.Module):
    def __init__(self, numClasses):
        super(TwoChannelCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(1, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(30720, 4096)
        self.fc2 = nn.Linear(4096, 1024)
        self.fc3 = nn.Linear(1024, numClasses)

    def forward(self, x):
        # x has shape (batch_size, 2, 50, 12)
        x1 = x[:, 0, :40, :].unsqueeze(1)  # shape: (batch_size, 1, 40, 12)
        x2 = x[:, 1, :40, :].unsqueeze(1)  # shape: (batch_size, 1, 40, 12)

        out1 = self.layer1(x1)  # shape: (batch_size, 64, 6, 3)
        out2 = self.layer2(x2)  # shape: (batch_size, 64, 6, 3)

        out1 = self.dropout(out1)
        out2 = self.dropout(out2)

        out = torch.cat((out1, out2), dim=1)  # shape: (batch_size, 128, 6, 3)
        out = out.view(out.size(0), -1)  # shape: (batch_size, 128*6*3)
#         print(f'shape: {out.shape}')
        out = F.relu(self.fc1(out))
        out = self.dropout(out)
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out
    
model = TwoChannelCNN(numClasses).to(device)

#if you want to load a previously saved model, use this
#so training on one model can happen in multiple sessions
# saved_model = torch.load('30_segments_more_epochs_180.pth')
# model.load_state_dict(saved_model)


print(model)



criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

epochs = 120
min_valid_loss = 0.082811
 
#start train loop
for e in range(epochs, 180):
    start = time.time()
    model.train()
    train_loss = 0.0
    
    for i, (data,labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
         
        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = model(data.float())
        # Find the Loss
        loss = criterion(target,labels)
        # Calculate gradients
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
        
    train_loss = train_loss / len(train_loader)
    
    valid_loss = 0.0
    model.eval()     # Optional when not using Model Specific layer
    for data, labels in validation_loader:
        # Transfer Data to GPU if available
        data, labels = data.to(device), labels.to(device)
         
        # Forward Pass
        target = model(data.float())
        # Find the Loss
        loss = criterion(target,labels)
        # Calculate Loss
        valid_loss += loss.item()
        
    valid_loss = valid_loss / len(validation_loader)
    
  
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss:.6f}-->{valid_loss:.6f}) \t Saving The Model')
        min_valid_loss = valid_loss
         
        # Saving State Dict
        torch.save(model.state_dict(), '40_segments_more_epochs_180.pth')
    running_accuracy = 0 
    total = 0 

    # saved_model = torch.load(CURRENT_MODEL)

    # model = TwoChannelCNN(numClasses).to(device)
    # model.load_state_dict(saved_model)



    with torch.no_grad(): 
        for i, data in enumerate(test_loader,0): 
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            predictions = model(inputs.float()) 
            predictions = torch.argmax(predictions, dim=1)
            total += len(labels)
            running_accuracy += (predictions == labels).sum().item() 

        print('Accuracy is: %d %%' % (100 * running_accuracy / total))    

    final_accuracy = running_accuracy / total 
    
    end = time.time()
    print(f'Epoch {e+1} took {(end-start):.6f} seconds \t Training Loss: {train_loss:.6f} \t Validation Loss: {valid_loss:.6f} \t Final Accuracy: {final_accuracy:.6f}')
    
# order of epochs, training_loss, validation_loss, fina
    with open('NN_log.txt', 'a') as f:
        # write the epochs
        f.write(str(e))
        f.write(',')        
        f.write(str(train_loss))
        f.write(',')
        f.write(str(valid_loss))
        f.write(',')
        f.write(str(final_accuracy))
        f.write('\n')

TwoChannelCNN(
  (layer1): Sequential(
    (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=30720, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=1024, bias=True)
  (fc

In [8]:
running_accuracy = 0 
total = 0 

# saved_model = torch.load(CURRENT_MODEL)

# model = TwoChannelCNN(numClasses).to(device)
# model.load_state_dict(saved_model)


model.eval()

with torch.no_grad(): 
    for i, data in enumerate(test_loader,0): 
        inputs, labels = data
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        predictions = model(inputs.float()) 
        predictions = torch.argmax(predictions, dim=1)
        total += len(labels)
        running_accuracy += (predictions == labels).sum().item() 

    print('Accuracy is: %d %%' % (100 * running_accuracy / total))    

final_accuracy = running_accuracy / total 
print(final_accuracy)

Accuracy is: 97 %
0.9777167269344418
