In [None]:
import numpy as np
import pandas as pd

## Process missing data
### Set parameter 'type_of_processed_missing' to a valid value ('mean','median','zero_padding')

In [19]:
from processing_missing_data import ProcessingMissingData
import pickle
## Processing the dataset with the class ProcessingMissingData

## Load train data
## Process the data with the sampler and segmenter
with open('Data\\train_inputs.pkl', 'rb') as handle:
    list_of_trains = pickle.load(handle)

processing_missing_data_obj = ProcessingMissingData()
## type accepts - 'mean', 'median', 'zero_padding'
type_of_processed_missing = 'zero_padding'
list_of_missing_vals_processed_trains_concat = processing_missing_data_obj.get_processed_dataset_as_list_of_vectors(list_of_data=list_of_trains, type=type_of_processed_missing)

list_of_missing_preprocessed_trains = processing_missing_data_obj.get_processed_dataset(list_of_data=list_of_trains, type=type_of_processed_missing)

## Process data to be ready to segment/sample
### Uncomment/Comment the correct 'curr_list_of_trains' depending if you want the raw data or with filled missing values

In [20]:
from person_processing import PersonProcessing
import pickle

## Process the data with the sampler and segmenter
with open('Data\\train_inputs.pkl', 'rb') as handle:
    list_of_trains = pickle.load(handle)


# RAW TRAINS or WITHOUT MISSING DATA TRAINS
# curr_list_of_trains = list_of_trains # RAW
curr_list_of_trains = list_of_missing_preprocessed_trains # Filled missing values data

# parameter k
param_k = 5   
# dict with key k number of samples taken and value list of vectors for that particular k
# dict with key k number of segmentations taken and value list of vectors for that particular k
dict_of_k_samples_features = dict()
dict_of_k_segmentations_features = dict()
for train_sample in curr_list_of_trains:
    for curr_k in range(2,param_k+1):
        person_processing_obj = PersonProcessing(train_sample)
        results_sampling = person_processing_obj.get_sampling(k=curr_k)
        results_segmentation = person_processing_obj.get_segmentation(k=curr_k)

        if curr_k in dict_of_k_samples_features:
            dict_of_k_samples_features[curr_k].append(results_sampling)
        else:
            dict_of_k_samples_features[curr_k] = list()
            dict_of_k_samples_features[curr_k].append(results_sampling)

        if curr_k in dict_of_k_segmentations_features:
            dict_of_k_segmentations_features[curr_k].append(results_segmentation)
        else:
            dict_of_k_segmentations_features[curr_k] = list()
            dict_of_k_segmentations_features[curr_k].append(results_segmentation)

## Process the labels

In [21]:
## Process the labels
with open('Data\\train_outputs.pkl', 'rb') as handle:
    list_of_train_labels_raw = pickle.load(handle)

In [22]:
list_of_train_labels = list()
for item in list_of_train_labels_raw:
    label_idx = (item[0] == 1).nonzero()[0][0]
    list_of_train_labels.append(label_idx)

## Neural Network Architecture

In [31]:
## Neural Network Architecture
from torch import nn
import torch 

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if device.type == 'cuda':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
else:
    torch.set_default_tensor_type('FloatTensor')

class JapaneseVowelsNN(nn.Module):
    def __init__(self, feature_dim, num_classes, dropout_rate):
        super(JapaneseVowelsNN, self).__init__()
        
        self.input_layer = nn.Linear(feature_dim, feature_dim*2)
        self.hidden_layer = nn.Linear(feature_dim*2, feature_dim)
        self.hidden_layer2 = nn.Linear(feature_dim, int(feature_dim/2))
        self.output_layer = nn.Linear(int(feature_dim/2), num_classes)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_rate)
        self.batchnorm1 = nn.BatchNorm1d(feature_dim*2)
        self.batchnorm2 = nn.BatchNorm1d(feature_dim)
        self.batchnorm3 = nn.BatchNorm1d(int(feature_dim/2))
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.hidden_layer(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)

        x = self.hidden_layer2(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.output_layer(x)
        
        return x

## Methods

In [32]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold, RepeatedKFold, StratifiedKFold

## Function for creating the data loaders and making train validate sets
def prepare_dataset(type='segmentation', k=2):
    if type=='segmentation':
        dataset = list(zip(torch.from_numpy(np.array(dict_of_k_segmentations_features[k])), 
        torch.from_numpy(np.array(list_of_train_labels))))

    elif type=='sampling':
        dataset = list(zip(torch.from_numpy(np.array(dict_of_k_samples_features[k])), 
        torch.from_numpy(np.array(list_of_train_labels))))

    elif type=='processed_missing':
        dataset = list(zip(torch.from_numpy(np.array(list_of_missing_vals_processed_trains_concat)), 
        torch.from_numpy(np.array(list_of_train_labels))))

    return dataset

def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc) * 100
    
    return acc

def create_cross_validator(n_splits=10):
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    return cv

def create_data_loaders(train_data, val_data):
    train_data_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=False)
    val_data_loader = DataLoader(dataset=val_data, batch_size=1, shuffle=False)
    return train_data_loader, val_data_loader

## Parameter settings
### Set 'TYPE_OF_FEATURES' depending if you want to make experiments for processed_missing, segmentation or sampling
### If you want to make experiment with segmentation + processed_missing (for example segment mean but from data filled with zero padding), make sure that you have set 'curr_list_of_trains' in section 'Process data to be ready to segment\sampling' in the beggining of the notebook to take the filled missing values.

In [33]:
## Parameters
EPOCHS = 200
BATCH_SIZE = 32
LEARNING_RATE = 0.001
NUM_CLASSES = 9

optimizer_name = 'Adam'
## For automating experiments with different dropout
## Here we are testing from 0 to 0.5
range_of_dropout = [p/10 for p in range(0, 6)]

# Possible types: "segmentation","sampling","processed_missing"
## Super important to be set correctly as it is used for saving the experiment in the correct dir
TYPE_OF_FEATURES = "processed_missing"

if TYPE_OF_FEATURES == "processed_missing":
    ## So we can get the directory 'processed_missing\\mean\\...'
    ## OR 'processed_missing\\median\\...' OR 'processed_missing\\zero_padding\\...'
    ## Basically the type of processed missing
    sub_folder = type_of_processed_missing
    ## Set to zero if type="processed_missing"
    K = 0
    LOOP_START_AT = 0
else:
    # if TYPE_OF_FEATURES == segmentation, we want to have the structure 'segmentation\\zero_padding\\...'
    ## OR 'segmentation\\mean\\...' and so on. 
    ## Basically the type of segmentation

    # In the case where we have segmentation without filling missing values
    if curr_list_of_trains[0].shape[0] < 26:
        sub_folder = 'no_filling_missing'
    else:
        sub_folder = type_of_processed_missing

    K = param_k
    LOOP_START_AT = 5

## Training. It consists of nested loops, their structure is as follows: <br>
### 1. Loop for dropout rate
###  &emsp;  2. Loop for k-segments (right now k=5 and the loop is static as we don't want other values for k right now)
###    &emsp; &emsp;   3. Loop for 10-fold cross validation
###     &emsp; &emsp; &emsp;     4. Loop for epochs
## After Loop 3 has completed, the following happens:
### 1. Result folder structure is created if it doesn't exist
### 2. Results are saved for the current dropout rate as pickle so they can be analyzed later

from torch import optim
import torch
import numpy as np 
from datetime import datetime
import os

## Move model to cuda if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dict_of_k_results = dict()

# Initialize cross valudator
cross_validator = create_cross_validator()
for curr_dropout in range_of_dropout:
    dropout_rate = curr_dropout
    for k in range(LOOP_START_AT,K+1):
            # Prepare data
            prepared_data = prepare_dataset(type=TYPE_OF_FEATURES, k=k)
            k_fold_num = 0
            for train_idx, val_idx in cross_validator.split(prepared_data):
                start_time_current_k_fold = datetime.now()
                k_fold_num += 1
                # Create data loaders for train/val for the current cross validation
                train_loader, val_loader = create_data_loaders(
                    train_data=prepared_data[train_idx[0]:train_idx[-1]] + [prepared_data[train_idx[-1]]], 
                    val_data=prepared_data[val_idx[0]:val_idx[-1]] + [prepared_data[val_idx[-1]]])

                """
                train_data=prepared_data[train_idx[0]:train_idx[-1]] + [prepared_data[train_idx[-1]]], 
                    val_data=prepared_data[val_idx[0]:val_idx[-1]] + [prepared_data[val_idx[-1]]])
                """
                # get feature dimensionality to set in the network
                feature_dim = next(iter(train_loader))[0].shape[1]

                ## Initialize model
                model = JapaneseVowelsNN(feature_dim=feature_dim, num_classes=NUM_CLASSES,dropout_rate=dropout_rate)
                model = model.double()
                model.to(device)

                criterion = nn.CrossEntropyLoss()

                if optimizer_name == 'Adam':
                    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
                elif optimizer_name == 'RMSProp':
                    optimizer = optim.RMSprop(model.parameters(), lr=LEARNING_RATE)
                elif optimizer_name == 'Adagrad':
                    optimizer = optim.Adagrad(model.parameters(), lr=LEARNING_RATE)

                for epoch in range(1, EPOCHS+1):
                    # TRAINING
                    train_epoch_loss = 0
                    train_epoch_acc = 0
                    model.train()
                    for X_train_batch, y_train_batch in train_loader:
                        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
                        optimizer.zero_grad()
                        
                        y_train_pred = model(X_train_batch)
                        
                        train_loss = criterion(y_train_pred, y_train_batch)
                        train_acc = multi_acc(y_train_pred, y_train_batch)
                        
                        train_loss.backward()
                        optimizer.step()
                        
                        train_epoch_loss += train_loss.item()
                        train_epoch_acc += train_acc.item()
                        
                        
                    # VALIDATION    
                    with torch.no_grad():
                        
                        val_epoch_loss = 0
                        val_epoch_acc = 0
                        
                        model.eval()
                        for X_val_batch, y_val_batch in val_loader:
                            X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
                            
                            y_val_pred = model(X_val_batch)
                                        
                            val_loss = criterion(y_val_pred, y_val_batch)
                            val_acc = multi_acc(y_val_pred, y_val_batch)
                            
                            val_epoch_loss += val_loss.item()
                            val_epoch_acc += val_acc.item()
                    
                    if epoch % 10 == 0:
                        print("Epoch: {} | Train Loss: {} |  Val Loss: {} | Train acc: {} | Val acc: {}".format(epoch,                                                  round(train_epoch_loss/len(train_loader),3), round(val_epoch_loss/len(val_loader),3), round                                                     (train_epoch_acc/len(train_loader),3), round(val_epoch_acc/len(val_loader),3)))

                        #print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
                        #print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')
                    if epoch == EPOCHS:
                        print('--------k param: {} k-fold num: {} completed!-----------'.format(k, k_fold_num))
            
                ## Check if key exists and create it if it doesnt and saves the results for the current k-fold
                if k not in dict_of_k_results:
                    dict_of_k_results[k] = {'train_acc':[], 'train_loss':[], 'val_acc':[],'val_loss':[],                                                                'type':TYPE_OF_FEATURES, 'convergence_time':[]}

                end_time_current_k_fold = datetime.now()
                dict_of_k_results[k]['train_acc'].append(train_epoch_acc/len(train_loader))
                dict_of_k_results[k]['train_loss'].append(train_epoch_loss/len(train_loader))
                dict_of_k_results[k]['val_acc'].append(val_epoch_acc/len(val_loader))
                dict_of_k_results[k]['val_loss'].append(val_epoch_loss/len(val_loader))
                dict_of_k_results[k]['convergence_time'].append(end_time_current_k_fold-start_time_current_k_fold)
            
            # Check if folder structure is created, if not - create it
            if not os.path.isdir('Results\\{}\\{}'.format(TYPE_OF_FEATURES, type_of_processed_missing)):
                os.mkdir('Results\\{}\\{}'.format(TYPE_OF_FEATURES, type_of_processed_missing))
            # Save results as pickles
            with open('Results\\{}\\{}\\dict_of_k_results_cv10_dropout-{}_3-layered_{}.pkl'.
                format(TYPE_OF_FEATURES,type_of_processed_missing,dropout_rate,optimizer_name), 'wb') as handle:
                    pickle.dump(dict_of_k_results, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Just trying some stuff

In [None]:
np.mean(dict_of_k_results[5]['val_acc'])

In [None]:
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')