In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.metrics import roc_curve, precision_recall_curve, auc
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split

In [None]:
df = pd.read_csv('diabetes.csv')
df.head(5)

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=0)
model = LogisticRegression(penalty='none', max_iter=10000)
model.fit(Xtrain, ytrain)
ypred = model.predict(Xtest)

ytest_prob = model.predict_proba(Xtest)

# R2

In [None]:
def cal_r2(y, ypred):
    RSS = np.sum((y - ypred) ** 2)
    TSS = sum((y - np.mean(y))**2)
    return 1 - RSS/TSS

# Performance

In [None]:
def compute_performance(ypred, ytest, classes):
    tp = sum(np.logical_and(ypred == classes[1], ytest == classes[1]))
    tn = sum(np.logical_and(ypred == classes[0], ytest == classes[0]))
    fp = sum(np.logical_and(ypred == classes[1], ytest == classes[0]))
    fn = sum(np.logical_and(ypred == classes[0], ytest == classes[1]))

    return tp, tn, fp, fn

tp, tn, fp, fn = compute_performance(ypred, ytest, model.classes_)
Acc = (tp + tn) / (tp + tn + fp + fn)
print("Acc: %.5f" % Acc)

# Threshold

In [None]:
threshold = 0.6

ytest_prob = model.predict_proba(Xtest)
ypred = model.classes_[(ytest_prob[:,1]>threshold).astype(int)]
tp, tn, fp, fn = compute_performance(ypred, ytest, model.classes_)
Acc = (tp + tn) / (tp + tn + fp + fn)
print("Acc: %.5f" % Acc)

# ROC and AUC

In [None]:
fpr, tpr, _ = roc_curve(ytest, ytest_prob[:,1], pos_label=1)
ax = sns.lineplot(x = fpr, y = tpr)

AUC = auc(fpr, tpr)
print("classifier (all feature): "+ str(AUC))