The purpose of this notebook is to analyse trained networks by:
- extracting and saving features of a network @Jackson
- dimensionality reduction and visualisation to analyse separability of classes @Rishi
- simple classification tecniques on extracted features @Rishi

In [None]:
import numpy as np
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
import itertools
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA

# Saved feature and corresponding label locations

- location where features and labels will be, or have already been saved

In [None]:
# ED class
filename_features_ED_train = '/media/rishi/DATA/EDvsALL-features-20180726T055245Z-001/EDvsALL-features/train_gap_feats.npy'
filename_labels_ED_train = '/media/rishi/DATA/EDvsALL-features-20180726T055245Z-001/EDvsALL-features/train_y_true.npy'
filename_features_ED_val = '/media/rishi/DATA/EDvsALL-features-20180726T055245Z-001/EDvsALL-features/val_gap_feats.npy'
filename_labels_ED_val = '/media/rishi/DATA/EDvsALL-features-20180726T055245Z-001/EDvsALL-features/val_y_true.npy'
# IP class
filename_features_IP_train = '/media/rishi/DATA/IPvsALL-features-20180726T055258Z-001/IPvsALL-features/train_gap_feats.npy'
filename_labels_IP_train = '/media/rishi/DATA/IPvsALL-features-20180726T055258Z-001/IPvsALL-features/train_y_true.npy'

# Feature Extraction from Network

 - passes a training and validation dataset through a network and extracts the features
 - saves the features and corresponding labels to file

# Load data

In [None]:
features_train = np.load( filename_features_ED_train )
features_val = np.load( filename_features_ED_val )
# labels are one hot embedded so need to take the argmax to get the index of the class
labels_train = np.argmax( np.load( filename_labels_ED_train ), axis=1 )
labels_val = np.argmax( np.load( filename_labels_ED_val ), axis=1 )

# Dimensionality Reduction

- uses t Schochastic Neighbourhood Embedding (tSNE) and Principle Component Analysis (PCA) to reduce the dimensionality of the data
- Hypothesis: a network that is effective at learning discriminitve features should reveal 

## PCA

In [None]:
# compute PCA on the training features
dimRed_pca = PCA(n_components=2)
dimRed_pca.fit( features_train )

In [None]:
# apply to training and validation
features_embedded_pca_train = dimRed_pca.transform( features_train )
labels_embedded_pca_train = labels_train
features_embedded_pca_val = dimRed_pca.transform( features_val )
labels_embedded_pca_val = labels_val

In [None]:
# plot the training and validation embeddings
for f, l in zip( [features_embedded_pca_train, features_embedded_pca_val], 
                [labels_embedded_pca_train,labels_embedded_pca_val]):
    plt.figure()
    marker = itertools.cycle(('x','+','o','p')) 
    for l_ in np.unique( l ):    
        plt.plot( f.T[0][l==l_], f.T[1][l==l_], next( marker) )
    plt.grid()
    plt.xlabel( 'Dimension 1' )
    plt.ylabel( 'Dimension 2' )
    plt.title( 'PCA of Features')
    plt.show()

## TSNE

### Only run this cell if tsne is to be run on already reduced data. In that case, run PCA with around 50 or so components

In [None]:
use_pca_data = False # change to true in order to use PCA data
if use_pca_data:
    features_train = features_embedded_pca_train
    features_val = features_embedded_pca_val
    labels_train = labels_embedded_pca_train
    labels_val = labels_embedded_pca_val

In [None]:
# compute the embedding of the training and validation data
# note that the same method of embedding cannot be applied to both sets
# @TODO - potentially could concatenate the two sets and apply embedding
dimRed_tsne = TSNE( n_components=2 )
features_embedded_tsne_train = dimRed_tsne.fit_transform( features_train )
labels_embedded_tsne_train = labels_train
features_embedded_tsne_val = dimRed_tsne.fit_transform( features_val )
labels_embedded_tsne_val = labels_val

In [None]:
# plot the training and validation embeddings
for f, l in zip( [features_embedded_tsne_train, features_embedded_tsne_val], 
                [labels_embedded_tsne_train,labels_embedded_tsne_val]):
    plt.figure()
    marker = itertools.cycle(('x','+','o','p')) 
    for l_ in np.unique( l ):    
        plt.plot( f.T[0][l==l_], f.T[1][l==l_], next( marker) )
    plt.grid()
    plt.xlabel( 'Dimension 1' )
    plt.ylabel( 'Dimension 2' )
    plt.title( 'TSNE of Features')
    plt.show()

# Classification of Features

## Neural Network

In [None]:
classifier_nn = MLPClassifier( solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(200), random_state=1 )
classifier_nn.fit( features_train, labels_train )

In [None]:
predictedClass_nn_train = classifier_nn.predict( features_train )
predictedClass_nn_val = classifier_nn.predict( features_val )

In [None]:
print( 'Confusion Matrix - Training' )
print( confusion_matrix( labels_train, predictedClass_nn_train ) )
print( 'Confusion Matrix - Validation' )
print( confusion_matrix( labels_val, predictedClass_nn_val ) )

## Support Vector Machine

In [None]:
classifier_svm = svm.SVC( kernel='rbf').fit( features_train, labels_train )

In [None]:
predictedClass_svm_train = classifier_svm.predict( features_train )
predictedClass_svm_val = classifier_svm.predict( features_val )

In [None]:
print( 'Confusion Matrix - Training' )
print( confusion_matrix( labels_train, predictedClass_svm_train ) )
print( 'Confusion Matrix - Validation' )
print( confusion_matrix( labels_val, predictedClass_svm_val ) )