# Diagnosing the Model

Evaluate the accuracy and health of the logistic regression model by creating a confusion matrix and classification report to describe the performance of the models.

In [5]:
from pathlib import Path
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import pandas as pd

### Create, train, and execute the LR model

In [9]:
# Prep the data
data = Path('./Resources/diabetes.csv')
df = pd.read_csv(data)

y = df["Outcome"]
X = df.drop(columns="Outcome")

In [14]:
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [13]:
X.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [15]:
# Split data into test and train subsets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Create and train LR model
classifier = LogisticRegression(random_state=1)

classifier.fit(X_train, y_train)

# Make LR predictions
predictions = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,1,1
2,0,0
3,1,1
4,0,0


# Confusion Matrix

In [16]:
# Create confusion matrix
confusion_matrix(y_test, predictions)
# rows, actuals, columns, predictions
# top right, false positive

array([[111,  14],
       [ 36,  31]], dtype=int64)

# Classification Report

In [18]:
# Create the classification report
target_names = ["No Diabetes", "Diabetes"]
print(classification_report(y_test, predictions, target_names=target_names))

              precision    recall  f1-score   support

 No Diabetes       0.76      0.89      0.82       125
    Diabetes       0.69      0.46      0.55        67

    accuracy                           0.74       192
   macro avg       0.72      0.68      0.68       192
weighted avg       0.73      0.74      0.72       192

