In [None]:
# Importing libraries
import numpy as np
import pandas as pd
import altair as alt

In [None]:
# Data Loading
df_5a = pd.read_csv('/content/drive/MyDrive/PG_Diploma_AI_ML_2021_UOHYD/assignments/5_Performance_metrics/5_a.csv')

df_5b = pd.read_csv('/content/drive/MyDrive/PG_Diploma_AI_ML_2021_UOHYD/assignments/5_Performance_metrics/5_b.csv')

df_5c = pd.read_csv('/content/drive/MyDrive/PG_Diploma_AI_ML_2021_UOHYD/assignments/5_Performance_metrics/5_c.csv')

df_5d = pd.read_csv('/content/drive/MyDrive/PG_Diploma_AI_ML_2021_UOHYD/assignments/5_Performance_metrics/5_d.csv')

## Machine Learning Classification Model Metrics

In [None]:
# Y label predicted based on threshold at 0.5
df_5a['y_pred_label'] = np.where(df_5a['proba']<0.5,False,True)
df_5b['y_pred_label'] = np.where(df_5b['proba']<0.5,False,True)
df_5c['y_pred_label'] = np.where(df_5c['prob']<0.5,False,True)

In [None]:
def my_confusion_matrix(y_actual,y_pred):
    """Custom Confusion matrix written by mallesham yamulla"""
    TP,TN,FN,FP=0,0,0,0
    for item in zip(y_actual,y_pred):
        if bool(item[0])==True and item[1]==True:
            TP+=1
        elif bool(item[0])==False and item[1]==False:
            TN+=1
        elif bool(item[0])==True and item[1]==False:
            FN+=1
        else:
            FP+=1
    conf_matrix = np.array([TN,FP,FN,TP]).reshape(2,2)
    return conf_matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
def my_performance_metrics(conf_matrix):
    """Custom function to get classification model metrics written by mallesham yamulla"""
    tn, fp, fn, tp = conf_matrix.ravel()
    accuracy = np.round((tp+tn)/(tp+tn+fn+fp),3)
    precision = np.round(tp/(tp+fp),3)
    recall = np.round(tp/(tp+fn),3)
    f2_score = np.round(2*precision*recall/(precision+recall),3)
    return f'Accuracy: {accuracy},Precision: {precision},Recall: {recall},F1-score {f2_score}'

In [None]:
def my_auc_roc_curve(y_true, y_prob,auc_score=True,roc_plot=False):

    """Custom function to calucate AUC score and plot ROC-AUC curve written by mallesham yamulla"""

    fpr = []
    tpr = []

    #y_proba = y_prob.sort_values(ascending=True)
    #y_actual = y_true.sort_values(ascending=True)
    df = pd.DataFrame({'y_true':y_true,'y_prob':y_prob})
    df_sort = df.sort_values('y_prob',ascending=False)

    thresholds = np.percentile(df_sort.y_prob,q=list(range(0,102,2)))[::-1]

    for tshold in thresholds:
        y_pred = np.where(df_sort.y_prob >=tshold, 1, 0)
        tn, fp, fn, tp = my_confusion_matrix(np.array(df_sort.y_true),np.array(y_pred)).ravel()
        tpr.append(tp / (tp + fn))
        fpr.append(fp / (fp + tn))

    df_X = pd.DataFrame({'tpr':tpr,'fpr':fpr,'threshold':thresholds})

    if auc_score is True:
        my_auc = np.trapz(df_X.tpr,df_X.fpr)
        print(f'AUC:{my_auc}')
        #return df_X


    if roc_plot is True:
        return alt.Chart(df_X).mark_area(fillOpacity = 0.8, fill = 'green').encode(
                                                alt.X('fpr', title="false positive rate"),
                                                alt.Y('tpr', title="true positive rate"))


In [None]:
def my_best_threshold(y_true,y_prob):
    """Compute the best threshold of probability which gives lowest values of metric A for the given data"""
    df = pd.DataFrame({'y_true':y_true,'y_prob':y_prob})
    df_sort = df.sort_values('y_prob',ascending=False)
    thresholds = np.percentile(df_sort.y_prob,q=list(range(0,102,2)))[::-1]
    A_scores = dict()
    for tshold in thresholds:
        y_pred = np.where(df_sort.y_prob >=tshold, 1, 0)
        tn, fp, fn, tp = my_confusion_matrix(np.array(df_sort.y_true),np.array(y_pred)).ravel()
        A_scores[tshold]= (500*fn) + (100*fp)
    min_val = min(A_scores,key=A_scores.get)
    return f'Threshold of probability which gives lowest values of metric A:{min_val} and {A_scores[min_val]}'

#### Task 1

In [None]:
# Looking at confusion matrix
task_1_conf_mat = my_confusion_matrix(df_5a.y,df_5a.y_pred_label)

In [None]:
task_1_conf_mat

array([[    0,   100],
       [    0, 10000]])

In [None]:
my_performance_metrics(task_1_conf_mat)

'Accuracy: 0.99,Precision: 0.99,Recall: 1.0,F1-score 0.995'

In [None]:
my_auc_roc_curve(df_5a.y,df_5a.proba,roc_plot=True)

AUC:0.487679


### Task 2

In [None]:
# Looking at confusion matrix
task_2_conf_mat = my_confusion_matrix(df_5b.y,df_5b.y_pred_label)

In [None]:
task_2_conf_mat

array([[9761,  239],
       [  45,   55]])

In [None]:
my_performance_metrics(task_2_conf_mat)

'Accuracy: 0.972,Precision: 0.187,Recall: 0.55,F1-score 0.279'

In [None]:
my_auc_roc_curve(df_5b.y,df_5b.proba,roc_plot=True)

AUC:0.9335665


### Task 3

In [None]:
# Looking at confusion matrix
task_3_conf_mat = my_confusion_matrix(df_5c.y,df_5c.y_pred_label)

In [None]:
task_3_conf_mat

array([[1637,  168],
       [ 462,  585]])

In [None]:
my_performance_metrics(task_3_conf_mat)

'Accuracy: 0.779,Precision: 0.777,Recall: 0.559,F1-score 0.65'

In [None]:
my_auc_roc_curve(df_5c.y,df_5c.prob,roc_plot=True)

AUC:0.8285247124748986


In [None]:
my_best_threshold(df_5c.y,df_5c.prob)

'Threshold of probability which gives lowest values of metric A:0.2276636573511201 and 141700'

## Machine Learning Regression Model Metrics

In [None]:
def my_r_squrd(y_actual,y_pred):
    """Regression model R Squared custom function written by mallesham yamulla"""
    y_mean = np.mean(y_actual)
    err = np.array((y_actual - y_pred)**2)
    ssr = np.sum(err)
    sst = np.sum(np.array((y_actual-y_mean))**2)
    return np.round(1 - (ssr/sst),3)

In [None]:
my_r_squrd(df_5d.y,df_5d.pred)

0.956

In [None]:
def my_rmse(y_actual,y_pred):
    """Regression model RMSE custom function written by mallesham yamulla"""
    N=len(y_actual)
    sum_err = np.sum(np.array((y_actual - y_pred)**2))
    rmse_cal = np.round(sum_err/N,3)
    return rmse_cal

In [None]:
my_rmse(df_5d.y,df_5d.pred)

177.166