In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
MY_PATH = '/content/drive/MyDrive/Colab Notebooks/temp/'
#data_file = MY_PATH + 'sample_prediction_0.csv'
data_file = MY_PATH + 'sample_prediction_1.csv'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from numpy import trapz

In [None]:
df = pd.read_csv(data_file)

In [None]:
y_val = df["actual"]
y_pred = df["prediction"]

actual_positive = (y_val == 1)
actual_negative = (y_val == 0)

t = 0.5
predict_positive = (y_pred >= t)
predict_negative = (y_pred < t)

tp = (predict_positive & actual_positive).sum()
tn = (predict_negative & actual_negative).sum()
fp = (predict_positive & actual_negative).sum()
fn = (predict_negative & actual_positive).sum()

In [None]:
confusion_matrix = np.array([
    [tn, fp],
    [fn, tp]
])
(confusion_matrix / confusion_matrix.sum()).round(2)

In [None]:
p = tp / (tp + fp)
r = tp / (tp + fn)
print(p)
print(r)

In [None]:
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)
print(tpr)
print(fpr)

In [None]:
def tpr_fpr_dataframe(y_val, y_pred):
    scores = []

    thresholds = np.linspace(0, 1, 101)

    for t in thresholds:
        actual_positive = (y_val == 1)
        actual_negative = (y_val == 0)

        predict_positive = (y_pred >= t)
        predict_negative = (y_pred < t)

        tp = (predict_positive & actual_positive).sum()
        tn = (predict_negative & actual_negative).sum()

        fp = (predict_positive & actual_negative).sum()
        fn = (predict_negative & actual_positive).sum()

        scores.append((t, tp, fp, fn, tn))

    columns = ['threshold', 'tp', 'fp', 'fn', 'tn']
    df_scores = pd.DataFrame(scores, columns=columns)

    df_scores['tpr'] = df_scores.tp / (df_scores.tp + df_scores.fn)
    df_scores['fpr'] = df_scores.fp / (df_scores.fp + df_scores.tn)
    
    return df_scores

In [None]:
df_scores = tpr_fpr_dataframe(y_val, y_pred)

In [None]:
plt.plot(df_scores.threshold, df_scores['tpr'], label='TPR')
plt.plot(df_scores.threshold, df_scores['fpr'], label='FPR')
plt.legend()

In [None]:
#plt.scatter(df_scores.fpr, df_scores.tpr, linestyle='-',color='orange', label='Model')
plt.plot(df_scores.fpr, df_scores.tpr, linestyle='-',color='orange', label='Model')
plt.plot([0, 1], [0, 1], linestyle='--', label='Random')
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.legend()

##Random model

In [None]:
np.random.seed(1)
y_rand = np.random.uniform(0, 1, size=len(y_val))

In [None]:
((y_rand >= 0.5) == y_val).mean()

In [None]:
df_rand = tpr_fpr_dataframe(y_val, y_rand)

In [None]:
plt.plot(df_rand.threshold, df_rand['tpr'], label='TPR')
plt.plot(df_rand.threshold, df_rand['fpr'], label='FPR')
plt.legend()

In [None]:
#plt.scatter(df_rand.fpr, df_rand.tpr, linestyle='-',color='orange', label='Model')
plt.plot(df_rand.fpr, df_rand.tpr, linestyle='-',color='orange', label='Random')
plt.plot([0, 1], [0, 1], linestyle='--', label='Random (ideal)')
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.legend()