In [19]:
import pandas as pd
from sklearn import metrics
import numpy as np

## Classification metrics

In [2]:
data = pd.read_csv("../tinytest/two-class-sample-data.csv")

In [10]:
truth = data["act"]
probability = data["pred"]
prediction = np.where(probability > 0.5, 1, 0)

In [11]:
print(truth[1:6])
print(prediction[1:6])

1    0
2    0
3    0
4    0
5    1
Name: act, dtype: int64
[0 0 0 0 1]


In [15]:
## accuracy
acc_score = metrics.accuracy_score(truth, prediction)
print("Accuracy Score:", acc_score)

Accuracy Score: 0.838


In [16]:
## auc
auc = metrics.roc_auc_score(truth, probability)
print("AUROC:", auc)

AUROC: 0.8395408938887199


In [97]:
## average precision score
avg_precision_score = metrics.average_precision_score(truth, prediction)
print("Average Precision Score:", avg_precision_score)

Average Precision Score: 0.7813020508395832


In [19]:
## balanced accuracy score
balanced_acc_score = metrics.balanced_accuracy_score(truth, prediction)
print("Balanced Accuracy Score:", balanced_acc_score)

Balanced Accuracy Score: 0.837904658270791


In [92]:
metrics.balanced_accuracy_score(truth, prediction, adjusted=True)

0.6758093165415819

In [20]:
## brier score loss
brier_score = metrics.brier_score_loss(truth, probability)
print("Brier Score Loss:", brier_score)

Brier Score Loss: 0.16388947565578774


In [22]:
## cohen kappa score
cohen_kappa_score = metrics.cohen_kappa_score(truth, prediction)
print("Cohen Kappa Score:", cohen_kappa_score)

Cohen Kappa Score: 0.6759066611184021


In [23]:
metrics.confusion_matrix(truth, prediction)

array([[428,  78],
       [ 84, 410]])

In [25]:
## F1 score
f1_score = metrics.f1_score(truth, prediction)
print("F1 Score:", f1_score)

F1 Score: 0.835030549898167


In [26]:
## Log loss
log_loss = metrics.log_loss(truth, probability)
print("Log Loss:", log_loss)

Log Loss: 0.5285336531342744


In [27]:
## Matthews correlation coefficient (MCC)
matthews_corr = metrics.matthews_corrcoef(truth, prediction)
print("Matthews correlation coefficient", matthews_corr)

Matthews correlation coefficient 0.6759553597038589


In [93]:
## Precision
precision = metrics.precision_score(truth, prediction)
print("Precision:", precision)

Precision: 0.8401639344262295


In [90]:
metrics.precision_score(truth, prediction, average = 'binary', pos_label = 1)

0.8401639344262295

In [96]:
## Recall
recall = metrics.recall_score(truth, prediction, pos_label  = 1)
print("Recall:", recall)

Recall: 0.8299595141700404


In [30]:
## Zero-one loss
zero_one_loss = metrics.zero_one_loss(truth, prediction)
print("Zero-One Loss:", zero_one_loss)

Zero-One Loss: 0.16200000000000003


In [52]:
## compute roc curve
fpr, tpr, thresholds = metrics.roc_curve(truth, probability, pos_label=1)
print("Length of thresholds vector:", len(thresholds))
print(fpr[1:6])
print(tpr[1:6])
print(thresholds[1:6])
print(max(thresholds)) # = max(probability) + 1

Length of thresholds vector: 283
[0.         0.         0.00197628 0.00197628 0.00395257]
[0.00202429 0.01012146 0.01012146 0.01619433 0.01619433]
[0.99774516 0.99590932 0.99375648 0.99160932 0.99115638]
1.9977451590821151


In [45]:
## Hinge loss
hinge_loss = metrics.hinge_loss(truth, prediction)
print("Hinge Loss:", hinge_loss)

Hinge Loss: 0.668


In [46]:
## Hamming loss
hamming_loss = metrics.hamming_loss(truth, prediction)
print("Hamming Loss:", hamming_loss)

Hamming Loss: 0.162


In [48]:
## Jaccard score
jaccard_score = metrics.jaccard_similarity_score(truth, prediction)
print("Jaccard Score:", jaccard_score)

Jaccard Score: 0.838


In [54]:
## Classification metrics
print(metrics.classification_report(truth, prediction))

              precision    recall  f1-score   support

           0       0.84      0.85      0.84       506
           1       0.84      0.83      0.84       494

   micro avg       0.84      0.84      0.84      1000
   macro avg       0.84      0.84      0.84      1000
weighted avg       0.84      0.84      0.84      1000



In [81]:
## precision-recall curve
metrics.precision_recall_curve(truth, prediction)

(array([0.494     , 0.84016393, 1.        ]),
 array([1.        , 0.82995951, 0.        ]),
 array([0, 1]))

In [75]:
## Combine all metrics into one data frame
sklearn_metrics = pd.DataFrame(data = {
    'accuracy_score':acc_score,
    'auc':auc,
    'average_precision_score':avg_precision_score,
    'balanced_accuracy':balanced_acc_score,
    'brier_score':brier_score,
    'cohen_kappa_score':cohen_kappa_score,
    'f1_score':f1_score,
    'hamming_loss':hamming_loss,
    'hinge_loss':hinge_loss,
    'jaccard_score':jaccard_score,
    'log_loss':log_loss,
    'matthews_corrcoef':matthews_corr,
    'precision':precision,
    'recall':recall,
    'zero_one_loss':zero_one_loss
}, index = [1])

In [80]:
sklearn_metrics.to_csv("../tinytest/sklearn-metrics-two-class.csv")

In [79]:
roc_curve = pd.DataFrame(data = {
    'fpr':fpr,
    'tpr':tpr,
    'thresholds':thresholds
})
roc_curve.to_csv("../tinytest/sklearn-metrics-roc-curve.csv")

## Regression metrics

In [8]:
reg_sample_data = pd.read_csv("../tinytest/regression-sample-data.csv")
print("Shape of data:", reg_sample_data.shape)
print("Column names:", reg_sample_data.columns)

Shape of data: (47, 2)
Column names: Index(['act', 'pred'], dtype='object')


In [9]:
## explained variation
explained_variation = metrics.explained_variance_score(reg_sample_data['act'], reg_sample_data['pred'])
print("Explained variation:", explained_variation)

Explained variation: 0.706735001592725


In [30]:
## max error
max_error = np.max(np.abs(np.array(reg_sample_data['act']), np.array(reg_sample_data['pred'])))
print("Max Error:", max_error)

Max Error: 92.5


In [28]:
np.array(reg_sample_data['act'])

array([80.2, 83.1, 92.5, 85.8, 76.9, 76.1, 83.8, 92.4, 82.4, 82.9, 87.1,
       64.1, 66.9, 68.9, 61.7, 68.3, 71.7, 55.7, 54.3, 65.1, 65.5, 65. ,
       56.6, 57.4, 72.5, 74.2, 72. , 60.5, 58.3, 65.4, 75.5, 69.3, 77.3,
       70.5, 79.4, 65. , 92.2, 79.3, 70.4, 65.7, 72.7, 64.4, 77.6, 67.6,
       35. , 44.7, 42.8])

In [13]:
## mean absolute error
mean_abs_error = metrics.mean_absolute_error(reg_sample_data['act'], reg_sample_data['pred'])
print("Mean Absolute Error", mean_abs_error)

Mean Absolute Error 5.32138023658641


In [14]:
## mean squared error
mean_squared_error = metrics.mean_squared_error(reg_sample_data['act'], reg_sample_data['pred'])
print("Mean Squared Error:", mean_squared_error)

Mean Squared Error: 44.78814745625719


In [16]:
## mean sqaured log error
mean_squared_log_error = metrics.mean_squared_log_error(reg_sample_data['act'], reg_sample_data['pred'])
print("Mean Squared Log Error:", mean_squared_log_error)

Mean Squared Log Error: 0.009589264084385303


In [17]:
## median absolute error
median_abs_error = metrics.median_absolute_error(reg_sample_data['act'], reg_sample_data['pred'])
print("Median Absolute Error:", median_abs_error)

Median Absolute Error: 4.990128583411703


In [18]:
## R2
r2 = metrics.r2_score(reg_sample_data['act'], reg_sample_data['pred'])
print("R2:", r2)

R2: 0.706735001592725


In [31]:
## Combine all regression metrics into data frame
reg_metrics_df = pd.DataFrame({
    'explained_variation':explained_variation,
    'max_error':max_error,
    'mean_absolute_error':mean_abs_error,
    'mean_squared_error':mean_squared_error,
    'mean_squared_log_error':mean_squared_log_error,
    'median_absolute_error':median_abs_error,
    'r2':r2
}, index = [1])
reg_metrics_df.to_csv("../tinytest/sklearn-metrics-regression.csv")