# Machine Learning Engineer Nanodegree - Capstone Project
## PART 7: Model Evaluation

**Author:** Giacomo Sarchioni

In [None]:
# Module imports
import pickle as pkl
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from roc_chart import ROCChart
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from roc_chart import ROCChart
from sklearn.externals import joblib
import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from scipy.stats import ttest_rel

random_seed = 42

## Import data

In [None]:
# Load reviews
with open('reviews/sentiment_reviews/sentiment.pkl', 'rb') as f:
    sentiment_reviews = pkl.load(f)
    
# Load indexes
with open('split_indexes/indexes.pkl', 'rb') as f:
    indexes = pkl.load(f)

In [None]:
# Extract scores
scores = sentiment_reviews.adj_score.values

# Extract non-test and test scores
non_test_scores = scores[indexes['non_test']]
test_scores = scores[indexes['test']]

# Extract train and validation scores
train_scores = non_test_scores[indexes['train']]
val_scores = non_test_scores[indexes['val']]

# Transform scores into categorical
scores_cat = keras.utils.to_categorical(scores,2)

# Extract non-test and test scores
non_test_scores_cat = scores_cat[indexes['non_test']]
test_scores_cat = scores_cat[indexes['test']]

# Extract train and validation scores
train_scores_cat = non_test_scores_cat[indexes['train']]
val_scores_cat = non_test_scores_cat[indexes['val']]

In [None]:
# Extract parser_zero_reviews 
parser_zero_reviews = sentiment_reviews.loc[:,'parser_zero'].values

# Extract non test parser two reviews and scores
non_test_parser_zero_reviews = parser_zero_reviews[indexes['non_test']]
test_parser_zero_reviews = parser_zero_reviews[indexes['test']]

# Extract train and validation parser-two reviews
train_parser_zero_reviews = non_test_parser_zero_reviews[indexes['train']]
val_parser_zero_reviews = non_test_parser_zero_reviews[indexes['val']]

In [None]:
# Extract parser_one_reviews 
parser_one_reviews = sentiment_reviews.loc[:,'parser_one'].values

# Extract non test parser two reviews and scores
non_test_parser_one_reviews = parser_one_reviews[indexes['non_test']]
test_parser_one_reviews = parser_one_reviews[indexes['test']]

# Extract train and validation parser-two reviews
train_parser_one_reviews = non_test_parser_one_reviews[indexes['train']]
val_parser_one_reviews = non_test_parser_one_reviews[indexes['val']]

## Import log reg benchmark model

In [None]:
# Load log reg benchmark
bmk_log_reg = joblib.load('bmk_models/bmk_log_reg.pkl')

## Parse data for deep learning models

In [None]:
# Bmk prediciton on validation set
val_bmk_pred = bmk_log_reg.predict(val_parser_one_reviews)

# Deep Learning prediction on validation set
val_dl_pred = deep_learning_model.predict_classes(padded_val)

In [None]:
# Print comparison chart
comparison_chart = ROCChart(val_scores,
                            [val_bmk_pred, val_dl_pred],
                            ['Bmk Log Reg', 'Refined Model'],
                            'AUC on validation set')
comparison_chart.plot()

#### Test

In [None]:
# Bmk prediciton on test set
test_bmk_pred = bmk_log_reg.predict(test_parser_one_reviews)

# Deep Learning prediction on test set
test_dl_pred = deep_learning_model.predict_classes(padded_test)

In [None]:
# Print comparison chart
comparison_chart = ROCChart(test_scores,
                            [test_bmk_pred, test_dl_pred],
                            ['Bmk Log Reg', 'Refined Model'],
                            'AUC on test set')
comparison_chart.plot()

## Build Deep Learning classifier

In [None]:
# Dump keras tokeniser
with open('final_model/dl_tokeniser.pkl', 'wb') as f:
    pkl.dump(keras_tokenizer, f)

In [None]:
import pickle as pkl
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

class DL_Classifier:
    
    def __init__(self, tokeniser_path, model_path, num_words=50):
        
        with open(tokeniser_path, 'rb') as f:
            self.tokeniser = pkl.load(f)
             
        self.model = load_model(model_path)
        self.num_words = num_words
        
    def predict(self, reviews, return_proba=False):
        
        reviews = self.tokeniser.texts_to_sequences(reviews)
        reviews = pad_sequences(reviews, self.num_words, padding='post')
        
        predictions = self.model.predict(reviews)
        
        if return_proba:
            return predictions
        else:
            return predictions.argmax(axis=1)

In [None]:
DL_model = DL_Classifier('final_model/dl_tokeniser.pkl', 'final_model/dl_final.hdf5')

In [None]:
DL_model.model.summary()

## Prediction on some reviews

#### Order of words

In [None]:
# Define number words in tokenizer
n_words_tokenizer = None

# Define max_len for reviews
max_len = 50

In [None]:
# Tokenise and pad  parser_zero reviews
# Import tokeniser
keras_tokenizer = Tokenizer(num_words = n_words_tokenizer)

keras_tokenizer.fit_on_texts(train_parser_one_reviews)

# Tokenise train, val and test parser_zero reviews
tokenised_train = keras_tokenizer.texts_to_sequences(train_parser_one_reviews)
tokenised_val = keras_tokenizer.texts_to_sequences(val_parser_one_reviews)
tokenised_test = keras_tokenizer.texts_to_sequences(test_parser_one_reviews)

# Transform reviews into sequences using pad_sequences
padded_train = pad_sequences(tokenised_train, maxlen=max_len, padding='post')
padded_val = pad_sequences(tokenised_val, maxlen=max_len, padding='post')
padded_test = pad_sequences(tokenised_test, maxlen=max_len, padding='post')

## Import deep learning model

In [None]:
test = ["I bought this product and I found it not to be very good"]

In [None]:
bmk_log_reg.predict_proba(test)

In [None]:
DL_model.predict(test, return_proba=True)

#### 10 Test Reviews

In [None]:
comparison_reviews = test_parser_zero_reviews[:10]
comparison_parser_one = test_parser_one_reviews[:10]
comparison_bmk_pred = [int(x) for x in bmk_log_reg.predict(comparison_parser_one)]
comparison_dl_pred = DL_model.predict(comparison_parser_one, False)

In [None]:
results = pd.DataFrame({'text' : comparison_reviews, 
                        'bmk_pred' : comparison_bmk_pred, 
                        'dl_pred' : comparison_dl_pred, 
                        'true' : [int(x) for x in test_scores[:10]]})

In [None]:
results

## Statistical test

In [None]:
deep_learning_model = load_model('final_model/dl_final.hdf5')

## Predictions

#### Validation