# Evaluating on the Validation Set
Data sets imported have already been created

In [2]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

%matplotlib inline

val_features_raw = pd.read_csv('output/val_features_raw.csv')
val_features_trans = pd.read_csv('output/val_features_trans.csv')
val_features_all = pd.read_csv('output/val_features_all.csv')

val_labels = pd.read_csv('output/val_labels.csv')

val_features_raw.head()


Unnamed: 0,line_length,verb_percentage,stopword_percentage,punctuation_percentage
0,15,0.0,0.0,0.0
1,71,14.3,4.2,1.4
2,18,0.0,0.0,5.6
3,7,0.0,0.0,0.0
4,42,0.0,4.8,7.1


## Read In Models

In [5]:
models = {}

for mdl in ['raw_original', 'transformed', 'all']:
    models[mdl] = joblib.load('models/mdl_{}_features.pkl'.format(mdl))

    

In [13]:
def evaluate_model(name, model, features, labels):
    start = time()
    pred = model.predict(features)
    end = time()
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred), 3)
    recall = round(recall_score(labels, pred), 3)
    print('{}\nAccuracy: {}\nPrecision: {}\nRecall: {}\nLatency: {}ms\n\n'.format(name,
                                                                              accuracy,
                                                                              precision,
                                                                              recall,
                                                                              round((end-start),3)))

In [14]:
evaluate_model('Raw Features', models['raw_original'], val_features_raw, val_labels)
evaluate_model('Transformed Features', models['transformed'], val_features_trans, val_labels)
evaluate_model('All Features', models['all'], val_features_all, val_labels)

Raw Features
Accuracy: 0.832
Precision: 0.861
Recall: 0.721
Latency: 0.03ms


Transformed Features
Accuracy: 0.837
Precision: 0.873
Recall: 0.721
Latency: 0.014ms


All Features
Accuracy: 0.817
Precision: 0.845
Recall: 0.698
Latency: 0.038ms




### Evaluate on Test Set

In [16]:
test_features = pd.read_csv('output/test_features_trans.csv')
test_labels = pd.read_csv('output/test_labels.csv')

evaluate_model('Transformed Features', models['transformed'], test_features, test_labels)

Transformed Features
Accuracy: 0.881
Precision: 0.889
Recall: 0.767
Latency: 0.019ms


