In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import data_utils.data_loading as data_load
from custom_nets.cnn import CNNAll
import data_utils.classes_labels as cl

random_seed = 42
random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)

In [None]:
class NetworkTrainer:

    def __init__(self, batch_size, device):
        self.batch_size = batch_size
        self.device = device
        self.trainloader, self.valloader = data_load.load_train_data(self.batch_size, '../data')
        if not os.path.exists('../nets'):
            os.makedirs('../nets')
        if not os.path.exists('../loss'):
            os.makedirs('../loss')
        if not os.path.exists('../accuracy'):
            os.makedirs('../accuracy')
        
    def train_one_epoch(self, network, optimizer, loss_criterion, epoch):
        
        running_loss = 0.0
        last_loss = 0.0

        running_accuracy = 0.0
        last_accuracy = 0.0

        for i, data in enumerate(self.trainloader):
            
            inputs, labels = data[0].to(self.device), data[1].to(self.device)

            optimizer.zero_grad()

            outputs = network(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = loss_criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_accuracy += (labels == predicted).sum().item() / len(labels)

            del inputs, labels, outputs

            if i % 200 == 199:
                last_loss = running_loss / 200
                last_accuracy = running_accuracy / 200
                print(f'[epoch: {epoch + 1}, batches: {i - 198:5d} - {i + 1:5d}] train loss: {last_loss:.3f}, train accuracy: {last_accuracy:.3f}')
                running_loss = 0.0
                running_accuracy = 0.0

        return last_loss, last_accuracy


    def train_network(self, network, optimizer, loss_criterion, number_of_epochs, name):
        
        network.to(self.device)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        best_networkstate_path = f'../nets/net_{name}_{format(timestamp)}'
        best_vloss = 1_000_000.
        loss = np.empty((number_of_epochs, 2))
        accuracy = np.empty((number_of_epochs, 2))

        for epoch in range(number_of_epochs):
            loss[epoch, 0], accuracy[epoch, 0] = self.train_one_epoch(network, optimizer, loss_criterion, epoch)

            with torch.no_grad():
                running_vloss = 0.0
                running_vaccuracy = 0.0
                for i, vdata in enumerate(self.valloader):
                    vinputs, vlabels = vdata[0].to(self.device), vdata[1].to(self.device)
                    voutputs = network(vinputs)
                    _, vpredicted = torch.max(voutputs.data, 1)
                    vloss = loss_criterion(voutputs, vlabels)
                    running_vloss += vloss
                    running_vaccuracy += (vlabels == vpredicted).sum().item() / len(vlabels)
                    del vinputs, vlabels, voutputs

            loss[epoch, 1] = avg_vloss = running_vloss / (i + 1)
            accuracy[epoch, 1] = avg_vaccuracy = running_vaccuracy / (i + 1)
            print(f'[epoch: {epoch + 1}] validation loss: {avg_vloss:.3f}, validation accuracy: {avg_vaccuracy:.3f}')
            np.savetxt(f'../loss/loss_{name}_{format(timestamp)}.csv', loss[:(epoch + 1), :], delimiter=',')
            np.savetxt(f'../accuracy/accuracy_{name}_{format(timestamp)}.csv', accuracy[:(epoch + 1), :], delimiter=',')

            if avg_vloss < best_vloss:
                best_vloss = avg_vloss
                torch.save(network.state_dict(), best_networkstate_path)

        print('Finished Training')
        self.visualize_loss(number_of_epochs, loss)
        self.visualize_accuracy(number_of_epochs, accuracy)
        return best_networkstate_path


    def visualize_loss(self, number_of_epochs, loss):
        
        plt.plot(range(1, number_of_epochs + 1), loss[:,0], marker='o')
        plt.plot(range(1, number_of_epochs + 1), loss[:,1], marker='o')
        plt.legend(['train', 'validation'])
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.xticks(range(1, number_of_epochs + 1))
        plt.show()


    def visualize_accuracy(self, number_of_epochs, accuracy):
        
        plt.plot(range(1, number_of_epochs + 1), accuracy[:,0], marker='o')
        plt.plot(range(1, number_of_epochs + 1), accuracy[:,1], marker='o')
        plt.legend(['train', 'validation'])
        plt.xlabel('epoch')
        plt.ylabel('accuracy')
        plt.xticks(range(1, number_of_epochs + 1))
        plt.show()


In [None]:
class NetworkTester:
    def __init__(self, batch_size, device):
        self.batch_size = batch_size
        self.device = device
        self.testloader, self.number_of_audio = data_load.load_test_data(self.batch_size, '../data')
        if not os.path.exists('../tests'):
            os.makedirs('../tests')
    
    def test_network(self, network, best_networkstate_path):
        
        classes = np.empty((self.number_of_audio), dtype=object)
        fnames = np.empty((self.number_of_audio), dtype=object)
        network.load_state_dict(torch.load(best_networkstate_path))
        network.to(self.device)
        total = 0

        with torch.no_grad():
            for data in self.testloader:
                images, file_names = data[0].to(self.device), data[1]
                outputs = network(images)
                _, predicted = torch.max(outputs.data, 1)
                predicted = predicted.tolist()
                for i, predicted_label in enumerate(predicted):
                    classes[total] = cl.label_number_to_class[predicted_label]
                    fnames[total] = file_names[i]
                    total += 1

        pd.DataFrame(classes, index=fnames, columns=['label']).to_csv(best_networkstate_path.replace('nets', 'tests') + '.csv', index_label='fname')

In [None]:
class Hyperparams():
  def __init__(self, learning_rate, optimizer_name, weight_decay, dropout_p=0):
    self.learning_rate = learning_rate
    self.optimizer_name = optimizer_name
    self.weight_decay = weight_decay
    self.dropout_p = dropout_p

  def get_optimizer(self, network):
    if(self.optimizer_name == 'ADAM'):
      return optim.Adam(network.parameters(), lr = self.learning_rate, weight_decay=self.weight_decay)
    elif(self.optimizer_name == 'SGD'):
      return optim.SGD(network.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay, momentum=0.9)

  def get_network_params_name(self, network):
    return f'{network.name}_lr_{self.learning_rate}_o_{self.optimizer_name}_wd_{self.weight_decay}_d_{self.dropout_p}'

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
number_of_epochs = 5

hyperparams = Hyperparams(
    learning_rate=0.001,
    optimizer_name='ADAM',
    weight_decay=0,
    dropout_p=0.3
)
batch_size = 32

trainer = NetworkTrainer(batch_size, device)
network = CNNAll(dropout_p=hyperparams.dropout_p)
optimizer = hyperparams.get_optimizer(network)
save_name = hyperparams.get_network_params_name(network)

best_state_path = trainer.train_network(network, optimizer, nn.CrossEntropyLoss(), number_of_epochs, save_name)

tester = NetworkTester(batch_size, device)
tester.test_network(network, best_state_path)