In [1]:
# import
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F

# Pytorch
import torch
from torch.autograd import Variable
import torch.nn as nn
from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler

# debuggin
from IPython.core.debugger import set_trace

print("import done!")

rng = np.random.random(23455)

import done!


In [2]:
# the model

class MultiFC(torch.nn.Module):
    
    def __init__(self, in_width, layer1_widht, layer2_width, out_width, activation, useNLL):
        super(MultiFC, self).__init__()
        self.fc1 = nn.Linear(in_width, layer1_widht)
        self.fc2 = nn.Linear(layer1_widht, layer2_width)
        self.fc3 = nn.Linear(layer2_width, out_width)
        self.activation = activation
        self.useNLL = useNLL
    
    def forward(self, x):
        
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        if(self.useNLL):
            x = F.log_softmax(self.fc3(x))
        return x
    
    def register_nan_checks(self):
        def check_grad(module, grad_input, grad_output):
            #  you can add this to see that the hook is called
            if any(np.all(np.isnan(gi.data.numpy())) for gi in grad_input if gi is not None):
                raise ValueError('NaN gradient in ' + type(module).__name__)
        self.apply(lambda module: module.register_backward_hook(check_grad))

In [3]:
# Model training
def train_model(training_x, training_target, valid_x, valid_target, layer1_width, layer2_width,
                classes, num_epochs, mini_batch_size, learning_rate, useNLL=True, activation=F.sigmoid):
    
    num_samples = training_x.shape[0]
    num_valid_samples = valid_x.shape[0]
    samples_per_batch = num_samples/mini_batch_size
    
    assert(num_samples > 0 and num_valid_samples > 0)
    
    print("Training a model:")
    print("learning_rate", learning_rate)
    print("layer 1 width", layer1_width)
    print("layer2_width", layer2_width)
    print("num_epoch", num_epoch)
    print("mini_batch_size", mini_batch_size)    
    print("num samples", num_samples)
    print("num validation samples", num_valid_samples)
    
    # building the model
    assert(layer1_width > 0 and layer2_width > 0 and classes > 0)
    model = MultiFC(training_x.shape[1], layer1_width, layer2_width, classes, activation, useNLL)

    # Convering torch to variables
    valid_x = Variable(valid_x)
    # training_target = Variable(training_target)
    # training_x = Variable(training_x)
    valid_target = Variable(valid_target)
    
    # loss function and update method
    if useNLL:
        criterion = nn.NLLLoss()
    else:
        criterion = nn.CrossEntropyLoss()
    
    assert(learning_rate > 0)
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)    

    # Statistics
    
    valid_accuracies = []
    valid_losses = []
    epoch_accuracies = []
    epoch_losses = []
    
    for epoch in range(1, num_epochs + 1):
        
        # Batch sampler
        
        sampler = BatchSampler(SubsetRandomSampler(range(num_samples)), 
                               mini_batch_size, drop_last=True)
        running_loss = 0
        running_corrects = 0

        
        for indices in sampler:
            
            indices = torch.LongTensor(indices)

            target = Variable(training_target[indices], requires_grad=False)
            x = Variable(training_x[indices], requires_grad=False)

            optimizer.zero_grad()
            
            distribution = model(x)
            loss = criterion(distribution, target)           
            # Update parameters
            loss.backward()
            optimizer.step()
            
            # statistics
            _, preds = torch.max(distribution.data, 1)
            running_loss += loss.data[0]
            running_corrects += torch.sum(preds == target.data)
        
        # nan check
        model.register_nan_checks()
        
        # Validation
        distribution = model(valid_x)
        loss = criterion(distribution, valid_target)
        _, preds = torch.max(distribution.data, 1)
 
        # Statistics
        epoch_losses.append(running_corrects / samples_per_batch)
        epoch_accuracies.append(running_loss / samples_per_batch)
 
        valid_accuracies.append(torch.sum(preds == valid_target.data) / num_valid_samples)
        valid_losses.append(running_loss / num_valid_samples)

        if(epoch % 10 == 0 or epoch == num_epochs):
            
            if(epoch == num_epochs):
                print("==== FINAL VALIDATION ====")
            
            print("Epoch: {}, Epoch loss: {}, Epoch acc: {}, Valid loss: {}, Valid acc: {}".format(
                epoch, round(epoch_losses[-1], 2), round(epoch_accuracies[-1], 2), round(valid_losses[-1], 2),
                round(valid_accuracies[-1], 2)))
    
    return model, epoch_losses, epoch_accuracies, valid_losses, valid_accuracies

In [4]:
def generate_samples(valid_samples, testing_samples, classes=np.arange(1,11), class_balanse=False,  num_features = 264):
    
    # Loading data
    data = np.array(pd.read_csv('../data/train_data.csv'))
    labels = np.array(pd.read_csv('../data/train_labels.csv'))

    # how many features
    data = data[:, 0:num_features]

    # remove classes
    indices = np.where((classes == labels))[0]
    
    data = data[indices]
    labels = labels[indices]    
    # sample indices
    indices = np.random.permutation(data.shape[0])
    
    valid_indices = indices[0: valid_samples]
    test_indices = indices[valid_samples: valid_samples + testing_samples]
    train_indices = indices[testing_samples + valid_samples:]
    
    # defining training data
    training_data = data[train_indices]
    training_labels = labels[train_indices]
    
    if(class_balanse):
        
        # lowest number of specific class samples
        samples_per_class = int(training_data.shape[0] / classes.shape[0])
        train_indices = np.zeros((classes.shape[0], samples_per_class))
        for index, c in enumerate(classes):
            class_sample_indices = np.where((c == training_labels))[0]
            train_indices[index] = np.random.choice(class_sample_indices, samples_per_class)

        train_indices = train_indices.ravel()
        train_indices = train_indices.astype(int)
        training_data = training_data[train_indices]
        training_labels = training_labels[train_indices]
    

    # Defining torch Tensor 
    training_x = torch.from_numpy(training_data).float()
    valid_x = torch.from_numpy(data[valid_indices]).float()
    test_x = torch.from_numpy(data[test_indices]).float()
    
    # loss function requires values being 0 <= v <= c -1
    training_target = torch.from_numpy(training_labels[:,0] - 1)
    valid_target = torch.from_numpy(labels[valid_indices,0] -1)
    test_target = torch.from_numpy(labels[valid_indices,0] -1)
    return training_x, training_target, valid_x, valid_target, test_x, test_target 

In [5]:
# Sample parameters (data size 4362)

valid_samples = 100
testing_samples = 100 # rest of data for training
num_features = 264
class_balanse = False
classes = np.arange(5,11)  # type: list (classes 1 - 10) decreases training sample size (!!)

# testing, validation and training samples
training_x, training_target, valid_x, valid_target, test_x, test_target = generate_samples(valid_samples, 
                                                                                           testing_samples, 
                                                                                           classes=classes,
                                                                                           class_balanse=class_balanse,
                                                                                           num_features=num_features)

In [6]:
# Hyperparameters

learning_rate = 0.2
layer1_width = 10
layer2_width = 10
num_epoch = 2200
mini_batch_size = 50
activation = F.tanh
useNLL=False

model_1 = train_model(training_x, training_target, valid_x, valid_target, layer1_width, layer2_width, classes.shape[0],
                num_epoch, mini_batch_size, learning_rate, useNLL=useNLL, activation=activation)

Training a model:
learning_rate 0.2
layer 1 width 10
layer2_width 10
num_epoch 2200
mini_batch_size 50
num samples 788
num validation samples 100
Epoch: 10, Epoch loss: 12.12, Epoch acc: 1.76, Valid loss: 0.28, Valid acc: 0.23
Epoch: 20, Epoch loss: 11.99, Epoch acc: 1.76, Valid loss: 0.28, Valid acc: 0.23
Epoch: 30, Epoch loss: 12.06, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 40, Epoch loss: 12.25, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 50, Epoch loss: 11.87, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 60, Epoch loss: 12.12, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 70, Epoch loss: 12.25, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 80, Epoch loss: 12.18, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 90, Epoch loss: 11.87, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 100, Epoch loss: 11.87, Epoch acc: 1.75, Valid loss: 0.28, Valid acc: 0.23
Epoch: 110, Epoch loss: 11.87, Epoch acc: 1

KeyboardInterrupt: 