# Accuracy Metrics

1. get predictions data
2. calculate accuracy metrics by cluster for each modeling approach
3. plot confusion matrix for each modeling approach

In [None]:
import os
import shutil
import joblib
import ast
import json
import pandas as pd
import numpy as np
import psycopg2 as pg
import datetime as dt
from AWS import AWS
from Utility import Utility
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix

In [None]:
# set environment
aws = None
s3_bucket_name = 'dse-cohort3-group3'
s3_dat_dir = 'PreprocessedWazeData'

# get args file
sampling_args_file = '../conf/pipeline_args.txt'
fr = open(sampling_args_file, 'r')
fa = fr.read()
file_args = ast.literal_eval(fa)

# assume save_dir already exists
save_dir = file_args['save_dir']

# assume connection file is always present
conn_str_file = file_args['conn_str_file']

In [None]:
# create subdirectory for results files - assumes save_dir already exists
results_dir = '{}/results_files'.format(save_dir)

# delete and remake results_dir if exists
if os.path.isdir(results_dir):
    shutil.rmtree(results_dir)

os.mkdir(results_dir)

In [None]:
# create utility object and connect to database
util = Utility(file_args)

## 1. read in actual and predictions for train and test data

In [None]:
# get actuals and predictions from csv
fn = os.path.join(save_dir, 'train_predictions.csv')
train_preds = pd.read_csv(fn)
train_preds['date'] = pd.to_datetime(train_preds['date'],format='%Y-%m-%d').dt.date
train_preds['time'] = pd.to_datetime(train_preds['time'],format='%H:%M:%S').dt.time
print(str(len(train_preds))+" rows")

In [None]:
# get actuals and predictions from csv
fn = os.path.join(save_dir, 'test_predictions.csv')
test_preds = pd.read_csv(fn)
test_preds['date'] = pd.to_datetime(test_preds['date'],format='%Y-%m-%d').dt.date
test_preds['time'] = pd.to_datetime(test_preds['time'],format='%H:%M:%S').dt.time
print(str(len(test_preds))+" rows")

## 2.  create metrics tables

In [None]:
def get_pct_negative(data):
    num_total = data.shape[0]
    num_pos = data['level_binary'].sum()
    num_neg = num_total - num_pos
    neg_pct = num_neg / num_total * 1.0
    return round(neg_pct,3)

In [None]:
def get_accuracy_metrics(data, target, model):
    metrics_dict = {
        'target': [],
        'model': [],
        'cluster': [],
        'count': [],
        'pct_negative': [],
        'accuracy': [],
        'precision': [],
        'recall': [],
        'f1_score': []
    }
    
    # fillna with 0
    data.fillna(0, inplace=True)
    
    # add additional column for best model if model is 'cluster_ensemble'
    if model == 'cluster_ensemble':
        metrics_dict['best_model'] = []
        
    # add individual cluster metrics
    for clust in data['cluster'].unique():
        clust_data = data[data['cluster']==clust]
        clust_count = clust_data.shape[0]
        clust_pct_neg = get_pct_negative(clust_data)
        clust_y_true = clust_data[target].values
        clust_y_pred = clust_data['{}_preds_{}'.format(target, model)]
        clust_accuracy = round(accuracy_score(clust_y_true, clust_y_pred), 3)
        clust_precision = round(precision_score(clust_y_true, clust_y_pred, average='macro'), 3)
        clust_recall = round(recall_score(clust_y_true, clust_y_pred, average='macro'), 3)
        clust_f1 = round(f1_score(clust_y_true, clust_y_pred, average='macro'), 3)

        metrics_dict['target'].append(target)
        metrics_dict['model'].append(model)
        metrics_dict['cluster'].append(clust)
        metrics_dict['count'].append(clust_count)
        metrics_dict['pct_negative'].append(clust_pct_neg)
        metrics_dict['accuracy'].append(clust_accuracy)
        metrics_dict['precision'].append(clust_precision)
        metrics_dict['recall'].append(clust_recall)
        metrics_dict['f1_score'].append(clust_f1)
        
        if model=='cluster_ensemble':
            stage = 1 if target=='level_binary' else 2
            metrics_dict['best_model'].append(util.get_best_model(stage, clust)[1].replace('model_',''))

    # add overall metrics
    overall_y_true = data[target].values
    overall_y_pred = data['{}_preds_{}'.format(target, model)]
    overall_accuracy = round(accuracy_score(overall_y_true, overall_y_pred), 3)
    overall_precision = round(precision_score(overall_y_true, overall_y_pred, average='macro'), 3)
    overall_recall = round(recall_score(overall_y_true, overall_y_pred, average='macro'), 3)
    overall_f1 = round(f1_score(overall_y_true, overall_y_pred, average='macro'), 3)
    
    metrics_dict['target'].append(target)
    metrics_dict['model'].append(model)
    metrics_dict['cluster'].append('all')
    metrics_dict['count'].append(data.shape[0])
    metrics_dict['pct_negative'].append(get_pct_negative(data))  
    metrics_dict['accuracy'].append(overall_accuracy)
    metrics_dict['precision'].append(overall_precision)
    metrics_dict['recall'].append(overall_recall)
    metrics_dict['f1_score'].append(overall_f1)
    
    if model=='cluster_ensemble':
        metrics_dict['best_model'].append('N/A')

    # create dataframe to return
    metrics_df = pd.DataFrame.from_dict(metrics_dict)
    if model=='cluster_ensemble':
        metrics_df = metrics_df[['target','model','cluster','best_model','count',
                                 'pct_negative','accuracy','precision','recall','f1_score']]
    else:
        metrics_df = metrics_df[['target','model','cluster','count','pct_negative',
                                 'accuracy','precision','recall','f1_score']]

    return metrics_df.sort_values(by='count', ascending=False)

In [None]:
get_accuracy_metrics(train_preds, 'level_binary', 'avg_baseline')

In [None]:
get_accuracy_metrics(test_preds, 'level_binary', 'avg_baseline')

In [None]:
get_accuracy_metrics(train_preds, 'level_binary', 'cluster_ensemble')

In [None]:
get_accuracy_metrics(test_preds, 'level_binary', 'cluster_ensemble')

In [None]:
get_accuracy_metrics(train_preds, 'level_max', 'avg_baseline')

In [None]:
get_accuracy_metrics(test_preds, 'level_max', 'avg_baseline')

In [None]:
get_accuracy_metrics(train_preds, 'level_max', 'cluster_ensemble')

In [None]:
get_accuracy_metrics(test_preds, 'level_max', 'cluster_ensemble')

## 3.  confusion matrix plots

In [None]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion Matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 1.1 if normalize else cm.max() / 1.2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.3f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.3f}; misclass={:0.3f}'.format(accuracy, misclass))
    plt.show()

In [None]:
def plot_cm(preds_df, target='level_max', model='avg_baseline', norm=False):
    y_true = preds_df[target].values
    y_pred = preds_df['{}_preds_{}'.format(target, model)].values
    cm = confusion_matrix(y_true, y_pred)
    cm_title = 'Confusion Matrix - {} target and {} model'.format(target, model)
    plot_confusion_matrix(cm, target_names=None, title=cm_title, normalize=norm)

In [None]:
plot_cm(preds_df=test_preds, target='level_binary', model='avg_baseline')

In [None]:
plot_cm(preds_df=test_preds, target='level_binary', model='cluster_ensemble')

In [None]:
plot_cm(preds_df=test_preds, target='level_max', model='avg_baseline')

In [None]:
plot_cm(preds_df=test_preds, target='level_max', model='cluster_ensemble')