In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import numpy as np
from collections import Counter

df = pd.read_csv('Notebooks/Datasets/diabetes.csv')

feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age']

X = df[feature_cols]

y = df['Outcome']

In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

y_test.value_counts()

0    130
1     62
Name: Outcome, dtype: int64

In [3]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression()

# fit model
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)



In [4]:
logreg.intercept_

array([-4.32142123])

In [5]:
Counter(y_pred)

Counter({0: 165, 1: 27})

In [6]:
Counter(y_test)

Counter({1: 62, 0: 130})

In [7]:
#Determining the accuracy
np.mean(y_test == y_pred)

0.6927083333333334

In [8]:
confusion_matrix(y_test, y_pred)

array([[118,  12],
       [ 47,  15]])

In [9]:
len(X)*0.25

192.0

In [10]:
def comp_yt_yp(y_test, y_predict):
    # create a blank 2x2 confusion matrix (all 0s)
    conf_matrix  = np.zeros((2, 2))
    # indices that will create all confusion matrix values
    # TP (1,1), TN (0,0), FP (0, 1), FN (1, 0)
    for row_index in [0, 1]:
        for column_index in [0, 1]:
            counter = 0
            # iterate through all elements of y_test, y_predict,
            # which are all values of either 0 or 1
            for (yt_index, yp_index) in zip(y_test, y_predict):
                # comparing the elements of y_test and y_predict with each confusion matrix value (TP, TN, FP, FN),
                # and if there's a match for the confusion matrix value we're looking at, increment the counter
                if (yt_index == row_index) & (yp_index == column_index):
                        counter += 1
            # Add the total number of elements for the confusion matrix value,
            # then look at the next value in the loop
            conf_matrix[row_index, column_index] = counter 
    return conf_matrix

# print the result of calculating our confusion matrix
print(comp_yt_yp(y_test, y_pred))

[[118.  12.]
 [ 47.  15.]]


In [11]:
from sklearn import metrics

confusion = metrics.confusion_matrix(y_test, y_pred)
print(confusion)

[[118  12]
 [ 47  15]]


In [12]:
y_pred_prob = logreg.predict_proba(X_test)

In [13]:
from sklearn import metrics
import numpy as np

confusion = np.array([[118,  12],[ 47,  15]])
# TN = 118
# FP = 12
# FN = 47
# TP = 15
#[TN, FP]
#[FN, TP]

In [14]:
def classifier(TN, FP, FN, TP):
    accuracy = (TP + TN) / (TP + TN + FN + FP)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    F1_Score = (2*(precision * recall)) / (precision + recall)
    
    return accuracy, precision, recall, F1_Score

In [15]:
classifier(118, 12, 47, 15)

(0.6927083333333334,
 0.5555555555555556,
 0.24193548387096775,
 0.3370786516853933)

### Obtaining y_pred from predict_proba(X_test)

In [31]:
def y_pred_elements(y_pred_prob):
    y_pred = []
    for i in y_pred_prob:
        if i[0] > 0.357639:
            y_pred.append(0)
        else:
            y_pred.append(1)
    
    return y_pred

In [32]:
new_y_pred = y_pred_elements(y_pred_prob)

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [None]:
# New y_pred threshold

In [30]:
y_train.value_counts()[1] / len(y_train)

0.3576388888888889

In [None]:
Counter()