In [2]:
# pip install scikit-learn numpy
import numpy as np
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, roc_curve, precision_recall_curve,
    average_precision_score, log_loss, brier_score_loss,
    r2_score, mean_squared_error, mean_absolute_error,
    cohen_kappa_score, matthews_corrcoef, classification_report,
    adjusted_rand_score, normalized_mutual_info_score, silhouette_score
)

def binary_counts(y_true, y_pred, positive=1):
    # returns TP, FP, FN, TN
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    TP = int(((y_true == positive) & (y_pred == positive)).sum())
    FP = int(((y_true != positive) & (y_pred == positive)).sum())
    FN = int(((y_true == positive) & (y_pred != positive)).sum())
    TN = int(((y_true != positive) & (y_pred != positive)).sum())
    return TP, FP, FN, TN

def metrics_from_confusion(TP, FP, FN, TN):
    N = TP+FP+FN+TN
    acc = (TP+TN)/N if N>0 else np.nan
    precision = TP/(TP+FP) if (TP+FP)>0 else np.nan
    recall = TP/(TP+FN) if (TP+FN)>0 else np.nan
    specificity = TN/(TN+FP) if (TN+FP)>0 else np.nan
    npv = TN/(TN+FN) if (TN+FN)>0 else np.nan
    f1 = 2*precision*recall/(precision+recall) if (precision+recall)>0 else np.nan
    # MCC careful with zero denominators
    denom = np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
    mcc = (TP*TN - FP*FN)/denom if denom>0 else np.nan
    return {
        'N':N,'accuracy':acc,'precision':precision,'recall':recall,
        'specificity':specificity,'npv':npv,'f1':f1,'mcc':mcc
    }

# Example using sklearn and manual:
def full_binary_report(y_true, y_pred, y_score=None, positive=1):
    TP, FP, FN, TN = binary_counts(y_true, y_pred, positive=positive)
    cm_metrics = metrics_from_confusion(TP,FP,FN,TN)
    report = {
        'confusion_matrix': np.array([[TP, FN],[FP, TN]]),
        'from_confusion': cm_metrics,
        # sklearn
        'accuracy': accuracy_score(y_true, y_pred),
        'precision_macro': precision_score(y_true, y_pred, pos_label=positive),
        'recall_macro': recall_score(y_true, y_pred, pos_label=positive),
        'f1': f1_score(y_true, y_pred, pos_label=positive),
        'cohen_kappa': cohen_kappa_score(y_true, y_pred),
        'mcc': matthews_corrcoef(y_true, y_pred),
        'classification_report': classification_report(y_true, y_pred, digits=4)
    }
    if y_score is not None:
        y_score = np.asarray(y_score)
        # ROC-AUC (needs both classes present)
        try:
            report['roc_auc'] = roc_auc_score(y_true, y_score)
        except Exception:
            report['roc_auc'] = np.nan
        # PR / AP
        try:
            report['average_precision'] = average_precision_score(y_true, y_score)
        except Exception:
            report['average_precision'] = np.nan
        # log-loss, brier
        try:
            report['log_loss'] = log_loss(y_true, y_score)
        except Exception:
            report['log_loss'] = np.nan
        try:
            report['brier'] = brier_score_loss(y_true, y_score)
        except Exception:
            report['brier'] = np.nan
    return report

# Regression helpers
def regression_report(y_true, y_pred, adjusted=False, p=None):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    n = len(y_true)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    out = {'n':n, 'mse':mse, 'rmse':rmse, 'mae':mae, 'r2':r2}
    if adjusted:
        if p is None:
            raise ValueError("Provide p = number of predictors for adjusted R2")
        out['r2_adj'] = 1 - (1-r2)*(n-1)/(n-p-1)
    return out

# Binary demo
y_true = [1,1,0,0,1,0,1,0,0,1]
y_pred = [1,0,0,0,1,0,1,1,0,1]
y_score = [0.9,0.4,0.2,0.1,0.86,0.05,0.78,0.6,0.01,0.95]
r = full_binary_report(y_true, y_pred, y_score)
print(r['from_confusion'])
print(r['classification_report'])

# Regression demo
y_t = [2.4, 0.5, 2.2, 1.9, 3.1]
y_p = [2.5, 0.6, 2.0, 2.1, 3.0]
print(regression_report(y_t, y_p, adjusted=True, p=1))



{'N': 10, 'accuracy': 0.8, 'precision': 0.8, 'recall': 0.8, 'specificity': 0.8, 'npv': 0.8, 'f1': 0.8000000000000002, 'mcc': np.float64(0.6)}
              precision    recall  f1-score   support

           0     0.8000    0.8000    0.8000         5
           1     0.8000    0.8000    0.8000         5

    accuracy                         0.8000        10
   macro avg     0.8000    0.8000    0.8000        10
weighted avg     0.8000    0.8000    0.8000        10

{'n': 5, 'mse': 0.022000000000000033, 'rmse': np.float64(0.14832396974191336), 'mae': 0.1400000000000001, 'r2': 0.9700109051254089, 'r2_adj': 0.9600145401672119}
