<h1> Performance Metrics </h1>

In this notebook, we will load in a trained multiclass LSTM model and evaluate it's performance. 

In [2]:
import sys;
import subprocess;
import numpy as np
import pandas as pd

from get_labels import get_labels
from sklearn.externals import joblib
from sklearn.cross_validation import train_test_split

import keras.backend as K
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Activation, Reshape
from keras.layers import LSTM

Using TensorFlow backend.


<h4> Load training data and labels. </h4>
We will load the same data used in training our LSTM model but keep only the test splits this time. The random seed of 1024 has been used to ensure we get the same splits as when training the model. We include the same multiclass_loss function as before so that the model is loaded properly.

In [6]:
labels = get_labels();
labels_array = np.array([x for x in labels]);
labels_reshaped = labels_array.reshape(1851243, 1, 1070);

In [7]:
train_x = joblib.load("/mnt/cleaned_tfidf_reduced_420_morning");
train_x_reshaped = train_x.reshape(1851243,1,1000);

In [8]:
x_train, x_test, y_train, y_test = train_test_split(train_x_reshaped, labels_reshaped, test_size=0.20, random_state=1024)

In [9]:
#Our custom loss function
def multiclass_loss(y_true, y_pred):
    EPS = 1e-5
    y_pred = K.clip(y_pred, EPS, 1 - EPS)
    return -K.mean((1 - y_true) * K.log(1 - y_pred) + y_true * K.log(y_pred))

model = load_model('khot_LSTM_0904.h5', custom_objects={"multiclass_loss":multiclass_loss})

Delete unused variables to clean up memory. This should free around 60 GiB. 

In [10]:
del labels
del labels_array
del labels_reshaped

del train_x
del train_x_reshaped

del x_train
del y_train

In [14]:
#predictions = np.array([89, 702, 307, 590,  93, 851, 782, 305, 349, 878, 913,  84, 663, 924, 403]);
#predictions = np.array([702, 782,  89, 305, 307, 851, 798,  93, 663,  84, 878, 590,  95, 499, 349]);

def get_preds_array():
    #all_preds = np.array((1,len(x_test)));
    #all_test = np.array((1,len(x_test)));
    all_preds = [];
    all_test = [];
    
    for idx, test_val in enumerate(x_test):
        y_test_val = y_test[idx];
        
        k = len(y_test_val[y_test_val == 1])
        
        pred_val = model.predict(test_val.reshape(1,1,1000))[0][0]
        topk = pred_val.argsort()[-1 * k:][::-1]

        pred_arr = np.zeros(y_test_val.shape);
        pred_arr[0,topk] = 1;
        
        all_preds.extend(pred_arr[0]);
        all_test.extend(y_test_val[0]);
        
        if idx % 500 == 0:
            sys.stdout.write('\rOn ' + str(idx) + ' / ' + str(len(x_test)));
            
    return (all_preds, all_test);

(all_preds, all_test) = get_preds_array();

(1, 1070)
(9.0, 9)
On 0 / 370249(1, 1070)
(9.0, 9)
(1, 1070)
(4.0, 4)
(1, 1070)
(11.0, 11)
(1, 1070)
(19.0, 19)
(1, 1070)
(8.0, 8)
(1, 1070)
(20.0, 20)
(1, 1070)
(10.0, 10)
(1, 1070)
(14.0, 14)
(1, 1070)
(15.0, 15)
(1, 1070)
(11.0, 11)
(1, 1070)
(2.0, 2)
(1, 1070)
(3.0, 3)
(1, 1070)
(7.0, 7)
(1, 1070)
(14.0, 14)
(1, 1070)
(16.0, 16)
(1, 1070)
(7.0, 7)
(1, 1070)
(10.0, 10)
(1, 1070)
(7.0, 7)
(1, 1070)
(8.0, 8)
(1, 1070)
(9.0, 9)
(1, 1070)
(11.0, 11)
(1, 1070)
(5.0, 5)
(1, 1070)
(16.0, 16)
(1, 1070)
(6.0, 6)
(1, 1070)
(12.0, 12)
(1, 1070)
(17.0, 17)
(1, 1070)
(20.0, 20)
(1, 1070)
(6.0, 6)
(1, 1070)
(20.0, 20)
(1, 1070)
(24.0, 24)
(1, 1070)
(19.0, 19)
(1, 1070)
(8.0, 8)
(1, 1070)
(30.0, 30)
(1, 1070)
(2.0, 2)
(1, 1070)
(9.0, 9)
(1, 1070)
(18.0, 18)
(1, 1070)
(13.0, 13)
(1, 1070)
(9.0, 9)
(1, 1070)
(23.0, 23)
(1, 1070)
(9.0, 9)
(1, 1070)
(11.0, 11)
(1, 1070)
(9.0, 9)
(1, 1070)
(12.0, 12)
(1, 1070)
(23.0, 23)
(1, 1070)
(10.0, 10)
(1, 1070)
(9.0, 9)
(1, 1070)
(14.0, 14)
(1, 1070)
(29.0, 29)


(1, 1070)
(17.0, 17)
(1, 1070)
(14.0, 14)
(1, 1070)
(22.0, 22)
(1, 1070)
(9.0, 9)
(1, 1070)
(21.0, 21)
(1, 1070)
(13.0, 13)
(1, 1070)
(14.0, 14)
(1, 1070)
(5.0, 5)
(1, 1070)
(9.0, 9)
(1, 1070)
(7.0, 7)
(1, 1070)
(5.0, 5)
(1, 1070)
(17.0, 17)
(1, 1070)
(18.0, 18)
(1, 1070)
(5.0, 5)
(1, 1070)
(14.0, 14)
(1, 1070)
(19.0, 19)
(1, 1070)
(9.0, 9)
(1, 1070)
(10.0, 10)
(1, 1070)
(16.0, 16)
(1, 1070)
(17.0, 17)
(1, 1070)
(6.0, 6)
(1, 1070)
(4.0, 4)
(1, 1070)
(9.0, 9)
(1, 1070)
(7.0, 7)
(1, 1070)
(28.0, 28)
(1, 1070)
(12.0, 12)
(1, 1070)
(17.0, 17)
(1, 1070)
(7.0, 7)
(1, 1070)
(18.0, 18)
(1, 1070)
(18.0, 18)
(1, 1070)
(8.0, 8)
(1, 1070)
(14.0, 14)
(1, 1070)
(8.0, 8)
(1, 1070)
(14.0, 14)
(1, 1070)
(13.0, 13)
(1, 1070)
(7.0, 7)
(1, 1070)
(8.0, 8)
(1, 1070)
(7.0, 7)
(1, 1070)
(8.0, 8)
(1, 1070)
(8.0, 8)
(1, 1070)
(4.0, 4)
(1, 1070)
(7.0, 7)
(1, 1070)
(28.0, 28)
(1, 1070)
(15.0, 15)
(1, 1070)
(16.0, 16)
(1, 1070)
(10.0, 10)
(1, 1070)
(9.0, 9)
(1, 1070)
(10.0, 10)
(1, 1070)
(8.0, 8)
(1, 1070)
(7.0, 7

(1, 1070)
(12.0, 12)
(1, 1070)
(8.0, 8)
(1, 1070)
(9.0, 9)
(1, 1070)
(11.0, 11)
(1, 1070)
(20.0, 20)
(1, 1070)
(17.0, 17)
(1, 1070)
(7.0, 7)
(1, 1070)
(14.0, 14)
(1, 1070)
(16.0, 16)
(1, 1070)
(16.0, 16)
(1, 1070)
(9.0, 9)
(1, 1070)
(18.0, 18)
(1, 1070)
(13.0, 13)
(1, 1070)
(5.0, 5)
(1, 1070)
(16.0, 16)
(1, 1070)
(7.0, 7)
(1, 1070)
(13.0, 13)
(1, 1070)
(15.0, 15)
(1, 1070)
(21.0, 21)
(1, 1070)
(7.0, 7)
(1, 1070)
(11.0, 11)
(1, 1070)
(18.0, 18)
(1, 1070)
(9.0, 9)
(1, 1070)
(8.0, 8)
(1, 1070)
(7.0, 7)
(1, 1070)
(7.0, 7)
(1, 1070)
(5.0, 5)
(1, 1070)
(8.0, 8)
(1, 1070)
(15.0, 15)
(1, 1070)
(17.0, 17)
(1, 1070)
(12.0, 12)
(1, 1070)
(8.0, 8)
(1, 1070)
(22.0, 22)
(1, 1070)
(7.0, 7)
(1, 1070)
(21.0, 21)
(1, 1070)
(10.0, 10)
(1, 1070)
(23.0, 23)
(1, 1070)
(15.0, 15)
(1, 1070)
(18.0, 18)
(1, 1070)
(7.0, 7)
(1, 1070)
(10.0, 10)
(1, 1070)
(8.0, 8)
(1, 1070)
(7.0, 7)
(1, 1070)
(8.0, 8)
(1, 1070)
(7.0, 7)
(1, 1070)
(9.0, 9)
(1, 1070)
(14.0, 14)
(1, 1070)
(7.0, 7)
(1, 1070)
(13.0, 13)
(1, 1070)
(16.0

(8.0, 8)
(1, 1070)
(11.0, 11)
(1, 1070)
(11.0, 11)
(1, 1070)
(16.0, 16)
(1, 1070)
(9.0, 9)
(1, 1070)
(5.0, 5)
(1, 1070)
(23.0, 23)
(1, 1070)
(10.0, 10)
(1, 1070)
(7.0, 7)
(1, 1070)
(9.0, 9)
(1, 1070)
(11.0, 11)
(1, 1070)
(16.0, 16)
(1, 1070)
(23.0, 23)
(1, 1070)
(8.0, 8)
(1, 1070)
(13.0, 13)
(1, 1070)
(8.0, 8)
(1, 1070)
(6.0, 6)
(1, 1070)
(15.0, 15)
(1, 1070)
(15.0, 15)
(1, 1070)
(10.0, 10)
(1, 1070)
(11.0, 11)
(1, 1070)
(26.0, 26)
(1, 1070)
(12.0, 12)
(1, 1070)
(11.0, 11)
(1, 1070)
(18.0, 18)
(1, 1070)
(27.0, 27)
(1, 1070)
(9.0, 9)
(1, 1070)
(8.0, 8)
(1, 1070)
(8.0, 8)
(1, 1070)
(14.0, 14)
(1, 1070)
(13.0, 13)
(1, 1070)
(12.0, 12)
(1, 1070)
(12.0, 12)
(1, 1070)
(15.0, 15)
(1, 1070)
(12.0, 12)
(1, 1070)
(7.0, 7)
(1, 1070)
(21.0, 21)
(1, 1070)
(14.0, 14)
(1, 1070)
(9.0, 9)
(1, 1070)
(11.0, 11)
(1, 1070)
(6.0, 6)
(1, 1070)
(15.0, 15)
(1, 1070)
(6.0, 6)
(1, 1070)
(17.0, 17)
(1, 1070)
(16.0, 16)
(1, 1070)
(21.0, 21)
(1, 1070)
(19.0, 19)
(1, 1070)
(7.0, 7)
(1, 1070)
(8.0, 8)
(1, 1070)
(9.0,

KeyboardInterrupt: 

In [19]:
#joblib.dump((all_preds, all_test), 'predictions_test.pkl')
preds_arr = np.array(all_preds);

In [20]:
del all_preds
test_arr = np.array(all_test);

In [21]:
del all_test

In [22]:
print(np.sum(preds_arr));
print(np.sum(test_arr));

4645257.0
4645257.0


In [23]:
def tp_rate(predictions, actuals, get_rate=True):
    sums = predictions + actuals;
    all2s = sums[sums == 2];
    if get_rate:
        return len(all2s) / float(sum(sums));
        #return len(all2s) / float(len(sums));
    else:
        return len(all2s);

In [24]:
def fp_rate(predictions, actuals, get_rate=True):
    act_not = np.logical_not(actuals).astype(int);
    return tp_rate(predictions, act_not, get_rate);

In [25]:
def fn_rate(predictions, actuals, get_rate=True):
    pred_not = np.logical_not(predictions).astype(int);
    return tp_rate(pred_not, actuals, get_rate);

In [26]:
def tn_rate(predictions, actuals, get_rate=True):
    pred_not = np.logical_not(predictions).astype(int);
    act_not = np.logical_not(actuals).astype(int);
    return tp_rate(pred_not, act_not, get_rate);

In [27]:
def accuracy(predictions, actuals):
    tp_val = tp_rate(predictions, actuals, False);
    tn_val = tn_rate(predictions, actuals, False);
    
    return (tp_val + tn_val) / float(len(predictions));

In [28]:
def precision(predictions, actuals, get_rate=True):
    tp = tp_rate(predictions, actuals, get_rate);
    fp = fp_rate(predictions, actuals, get_rate);
    return (float(tp) / (tp + fp));

In [29]:
def recall(predictions, actuals, get_rate=True):
    tp = tp_rate(predictions, actuals, get_rate);
    fn = fn_rate(predictions, actuals, get_rate);
    
    return (tp / float(tp + fn));

In [30]:
def confusion_array(predictions, actuals, get_rate=True):
    fp = fp_rate(predictions, actuals, get_rate);
    tp = tp_rate(predictions, actuals, get_rate);
    fn = fn_rate(predictions, actuals, get_rate);
    tn = tn_rate(predictions, actuals, get_rate);
    
    conf = np.array([[tp, fp], [fn, tn]]);

    conf_pdf = pd.DataFrame(conf);
    conf_pdf.columns = ['1', '0'];
    conf_pdf.set_index(np.array(['1', '0']))
    
    return conf_pdf;

k = variable (904 model)

In [31]:
conf_arr = confusion_array(preds_arr, test_arr)

In [32]:
conf_arr

Unnamed: 0,1,0
0,0.129514,0.008688
1,0.008688,0.495604


In [33]:
conf_vals = confusion_array(preds_arr, test_arr, False)

In [34]:
conf_vals

Unnamed: 0,1,0
0,1203247,3442010
1,3442010,388079163


In [35]:
precision_val = precision(preds_arr, test_arr);

In [36]:
precision_val

0.9371332841582819

In [37]:
recall_val = recall(preds_arr, test_arr);

In [38]:
recall_val

0.9371332841582819

In [39]:
acc_val = accuracy(preds_arr, test_arr);

In [40]:
acc_val

0.9826234141040169

In [41]:
from sklearn.metrics import roc_auc_score;

In [42]:
roc_auc_score(test_arr, preds_arr)

0.62511781342164274