In [1]:
# Create the imports for the project!

import numpy as np
import pandas as pd
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import tensorflow as tf

## Heart Analysis

In [2]:
heart_data = pd.read_csv("../Datasets/heart_2020_cleaned.csv")

In [3]:
# Print the head of the heart_data
heart_data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [4]:
# Learn about columns

print("Heart Data Columns:")
for col_name, dtype in zip(heart_data.columns, heart_data.dtypes):
    print(f"\t{col_name} \n\t\tTYPE: {dtype}")

Heart Data Columns:
	HeartDisease 
		TYPE: object
	BMI 
		TYPE: float64
	Smoking 
		TYPE: object
	AlcoholDrinking 
		TYPE: object
	Stroke 
		TYPE: object
	PhysicalHealth 
		TYPE: float64
	MentalHealth 
		TYPE: float64
	DiffWalking 
		TYPE: object
	Sex 
		TYPE: object
	AgeCategory 
		TYPE: object
	Race 
		TYPE: object
	Diabetic 
		TYPE: object
	PhysicalActivity 
		TYPE: object
	GenHealth 
		TYPE: object
	SleepTime 
		TYPE: float64
	Asthma 
		TYPE: object
	KidneyDisease 
		TYPE: object
	SkinCancer 
		TYPE: object


### Dataset Initial Concerns

Initially, I worry about the large amount of object variables, I will have to do some
encoding of some sort to physically quantify these columns!

Let's start with the Heart Disease column. This columns has values of "Yes" and "No".
This means we can give probability values of 0.0 and 1.0 for "No" and "Yes" respectively.

This means we can use these values in probabilistic systems!!!!

In [5]:
# Start with HeartDisease
print("Unique Values of columns in Heart Disease")
print(heart_data['HeartDisease'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.HeartDisease == 'No', "HeartDisease"] = 0.0
heart_data.loc[heart_data.HeartDisease == 'Yes', "HeartDisease"] = 1.0

heart_data = heart_data.astype({"HeartDisease": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Heart Disease
['No' 'Yes']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,0.0,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,0.0,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,0.0,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,0.0,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


Now we can do the same thing to the following columns: Smoking, AlcoholDrinking, Stroke, DiffWalking, Sex, PhysicalActivity, Asthma, KidneyDisease, and SkinCancer. The other
ones have columns which are not specifially "Yes" or "No" so we need to do a bit more advanced procedure to do this.

In [6]:
# Smoking
print("Unique Values of columns in Smoking")
print(heart_data['Smoking'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.Smoking == 'No', "Smoking"] = 0.0
heart_data.loc[heart_data.Smoking == 'Yes', "Smoking"] = 1.0

heart_data = heart_data.astype({"Smoking": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Smoking
['Yes' 'No']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,0.0,20.34,0.0,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,0.0,26.58,1.0,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,0.0,24.21,0.0,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,0.0,23.71,0.0,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [7]:
# AlcoholDrinking
print("Unique Values of columns in Alcohol Drinking")
print(heart_data['AlcoholDrinking'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.AlcoholDrinking == 'No', "AlcoholDrinking"] = 0.0
heart_data.loc[heart_data.AlcoholDrinking == 'Yes', "AlcoholDrinking"] = 1.0

heart_data = heart_data.astype({"AlcoholDrinking": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Alcohol Drinking
['No' 'Yes']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,0.0,20.34,0.0,0.0,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,0.0,26.58,1.0,0.0,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,0.0,24.21,0.0,0.0,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,0.0,23.71,0.0,0.0,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [8]:
# Stroke
print("Unique Values of columns in Stroke")
print(heart_data['Stroke'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.Stroke == 'No', "Stroke"] = 0.0
heart_data.loc[heart_data.Stroke == 'Yes', "Stroke"] = 1.0

heart_data = heart_data.astype({"Stroke": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Stroke
['No' 'Yes']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,0.0,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,0.0,20.34,0.0,0.0,1.0,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,0.0,26.58,1.0,0.0,0.0,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,0.0,24.21,0.0,0.0,0.0,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,0.0,23.71,0.0,0.0,0.0,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [10]:
# DiffWalking
print("Unique Values of columns in DiffWalking")
print(heart_data['DiffWalking'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.DiffWalking == 'No', "DiffWalking"] = 0.0
heart_data.loc[heart_data.DiffWalking == 'Yes', "DiffWalking"] = 1.0

heart_data = heart_data.astype({"DiffWalking": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in DiffWalking
['No' 'Yes']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,0.0,3.0,30.0,0.0,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,0.0,20.34,0.0,0.0,1.0,0.0,0.0,0.0,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,0.0,26.58,1.0,0.0,0.0,20.0,30.0,0.0,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,0.0,24.21,0.0,0.0,0.0,0.0,0.0,0.0,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,0.0,23.71,0.0,0.0,0.0,28.0,0.0,1.0,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [12]:
# PhysicalActivity
print("Unique Values of columns in Physical Activity")
print(heart_data['PhysicalActivity'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.PhysicalActivity == 'No', "PhysicalActivity"] = 0.0
heart_data.loc[heart_data.PhysicalActivity == 'Yes', "PhysicalActivity"] = 1.0

heart_data = heart_data.astype({"PhysicalActivity": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Physica lActivity
['Yes' 'No']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,0.0,3.0,30.0,0.0,Female,55-59,White,1,1.0,Very good,5.0,Yes,No,Yes
1,0.0,20.34,0.0,0.0,1.0,0.0,0.0,0.0,Female,80 or older,White,0,1.0,Very good,7.0,No,No,No
2,0.0,26.58,1.0,0.0,0.0,20.0,30.0,0.0,Male,65-69,White,1,1.0,Fair,8.0,Yes,No,No
3,0.0,24.21,0.0,0.0,0.0,0.0,0.0,0.0,Female,75-79,White,0,0.0,Good,6.0,No,No,Yes
4,0.0,23.71,0.0,0.0,0.0,28.0,0.0,1.0,Female,40-44,White,0,1.0,Very good,8.0,No,No,No


In [13]:
# Asthma
print("Unique Values of columns in Asthma")
print(heart_data['Asthma'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.Asthma == 'No', "Asthma"] = 0.0
heart_data.loc[heart_data.Asthma == 'Yes', "Asthma"] = 1.0

heart_data = heart_data.astype({"Asthma": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Asthma
['Yes' 'No']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,0.0,3.0,30.0,0.0,Female,55-59,White,1,1.0,Very good,5.0,1.0,No,Yes
1,0.0,20.34,0.0,0.0,1.0,0.0,0.0,0.0,Female,80 or older,White,0,1.0,Very good,7.0,0.0,No,No
2,0.0,26.58,1.0,0.0,0.0,20.0,30.0,0.0,Male,65-69,White,1,1.0,Fair,8.0,1.0,No,No
3,0.0,24.21,0.0,0.0,0.0,0.0,0.0,0.0,Female,75-79,White,0,0.0,Good,6.0,0.0,No,Yes
4,0.0,23.71,0.0,0.0,0.0,28.0,0.0,1.0,Female,40-44,White,0,1.0,Very good,8.0,0.0,No,No


In [14]:
# KidneyDisease
print("Unique Values of columns in KidneyDisease")
print(heart_data['KidneyDisease'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.KidneyDisease == 'No', "KidneyDisease"] = 0.0
heart_data.loc[heart_data.KidneyDisease == 'Yes', "KidneyDisease"] = 1.0

heart_data = heart_data.astype({"KidneyDisease": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in KidneyDisease
['No' 'Yes']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,0.0,3.0,30.0,0.0,Female,55-59,White,1,1.0,Very good,5.0,1.0,0.0,Yes
1,0.0,20.34,0.0,0.0,1.0,0.0,0.0,0.0,Female,80 or older,White,0,1.0,Very good,7.0,0.0,0.0,No
2,0.0,26.58,1.0,0.0,0.0,20.0,30.0,0.0,Male,65-69,White,1,1.0,Fair,8.0,1.0,0.0,No
3,0.0,24.21,0.0,0.0,0.0,0.0,0.0,0.0,Female,75-79,White,0,0.0,Good,6.0,0.0,0.0,Yes
4,0.0,23.71,0.0,0.0,0.0,28.0,0.0,1.0,Female,40-44,White,0,1.0,Very good,8.0,0.0,0.0,No


In [15]:
# SkinCancer
print("Unique Values of columns in Skin Cancer")
print(heart_data['SkinCancer'].unique())

# Replace all 'No' with 0 and 'Yes' with 1

heart_data.loc[heart_data.SkinCancer == 'No', "SkinCancer"] = 0.0
heart_data.loc[heart_data.SkinCancer == 'Yes', "SkinCancer"] = 1.0

heart_data = heart_data.astype({"SkinCancer": 'float32'})


# Print the head of the columns
heart_data.head()

Unique Values of columns in Skin Cancer
['Yes' 'No']


Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0.0,16.6,1.0,0.0,0.0,3.0,30.0,0.0,Female,55-59,White,1,1.0,Very good,5.0,1.0,0.0,1.0
1,0.0,20.34,0.0,0.0,1.0,0.0,0.0,0.0,Female,80 or older,White,0,1.0,Very good,7.0,0.0,0.0,0.0
2,0.0,26.58,1.0,0.0,0.0,20.0,30.0,0.0,Male,65-69,White,1,1.0,Fair,8.0,1.0,0.0,0.0
3,0.0,24.21,0.0,0.0,0.0,0.0,0.0,0.0,Female,75-79,White,0,0.0,Good,6.0,0.0,0.0,1.0
4,0.0,23.71,0.0,0.0,0.0,28.0,0.0,1.0,Female,40-44,White,0,1.0,Very good,8.0,0.0,0.0,0.0


Now we start looking at the 