## Creating Logistic Regression Model for Heart Disease Dataset

In [133]:
# Import the required modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

## Step 1: Read in the dataset 

In [134]:
# Read the Heart_Disease_Prediction.csv file from the Resources folder into a Pandas DataFrame
file_path = "../Resources/Heart_Disease_Prediction.csv"
#file_path = "Heart_Disease_Prediction.csv" - Use with Google Colab
df_heart = pd.read_csv(file_path)
df_heart.head()


Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence


## Step 2: Split the data into X and y and then into testing and training sets.

In [135]:
# Split the data into X (features) and y (target)
# The y variable should focus on the target column
y = df_heart['Heart Disease']

# The X variable should include all features except the target
X = df_heart.drop(columns=['Heart Disease'])


In [136]:
# Split into testing and training sets using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [137]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Step 3: Fit a logistic regression classifier.

In [138]:
# Declare a logistic regression model.
lr_model = LogisticRegression(random_state=9)

# Fit and save the logistic regression model using the training data
lr_model.fit(X_train_scaled, y_train)

LogisticRegression(random_state=9)

## Step 4: Create the predicted values for the testing and the training data.

In [139]:
# Generate training predictions
training_predictions = lr_model.predict(X_train_scaled)

# Generate testing predictions
testing_predictions = lr_model.predict(X_test_scaled)


## Step 5: Print a confusion matrix for the training data.

In [140]:
# Import the model for sklearn confusion matrix
from sklearn.metrics import confusion_matrix

# Create and save the confusion matrix for the training data
training_matrix = confusion_matrix(y_train, training_predictions)
training_df = pd.DataFrame(
    training_matrix, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Print the confusion matrix for the training data
display(training_df)

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,99,13
Actual 1,12,78


## Step 6: Print the training classification report.

In [141]:
# Create and save the training classification report
training_report = classification_report(y_train, training_predictions)

# Print the training classification report
print(training_report)

              precision    recall  f1-score   support

     Absence       0.89      0.88      0.89       112
    Presence       0.86      0.87      0.86        90

    accuracy                           0.88       202
   macro avg       0.87      0.88      0.87       202
weighted avg       0.88      0.88      0.88       202



## Step 7: Print the testing confusion matrix and classification report.

In [142]:
# Create and save the testing classification report
testing_report = classification_report(y_test, testing_predictions)

# Calculating the confusion matrix
cm = confusion_matrix(y_test, testing_predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, testing_predictions)

# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, testing_predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,33,5
Actual 1,9,21


Accuracy Score : 0.7941176470588235
Classification Report
              precision    recall  f1-score   support

     Absence       0.79      0.87      0.82        38
    Presence       0.81      0.70      0.75        30

    accuracy                           0.79        68
   macro avg       0.80      0.78      0.79        68
weighted avg       0.80      0.79      0.79        68

