In [31]:
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from google.colab import drive
drive.mount('/content/drive')

model = keras.models.load_model("/content/drive/Shared drives/ECS171 Group Project Team 11/best_model")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
def get_input():
    df = {'HeartDisease' : [None]}

    selections = {
        'BMI' : [0],
        'PhysicalHealth' : [0,30],
        'MentalHealth' : [0,30],
        'SleepTime' : [0],
        'Smoking': ['No', 'Yes'],
        'AlcoholDrinking': ['No', 'Yes'],
        'Stroke': ['No', 'Yes'],
        'DiffWalking': ['No', 'Yes'],
        'Sex' : ['Female', 'Male'],
        'PhysicalActivity': ['No', 'Yes'],	
        'Asthma': ['No', 'Yes'],	
        'KidneyDisease': ['No', 'Yes'],	
        'SkinCancer': ['No', 'Yes'],
        'AgeCategory': ['55-59', '80 or older', '65-69', '75-79', '40-44', '70-74', '60-64', '50-54', '45-49', '18-24', '35-39', '30-34', '25-29'],
        'Race': ['White', 'Black', 'Asian', 'American Indian/Alaskan Native', 'Other', 'Hispanic'],	
        'Diabetic' : ['Yes', 'No', 'No, borderline diabetes', 'Yes (during pregnancy)'],	
        'GenHealth' : ['Very good', 'Fair', 'Good', 'Poor', 'Excellent'],
    }

    for selection in selections:
        categories = selections[selection]

        print(selection)

        if not isinstance(categories[0], str):
            
            num = None

            while(True):
                num = input('Enter: ')

                try:
                    num = float(num)
                except:
                    print(f'Invalid input: please enter a number.')
                    continue

                if (len(categories) == 1) :
                    if (num < categories[0] ):
                        print(f'Invalid input: please enter a positive number.')
                        continue
                else:
                    if (num < categories[0] or num > categories[1]):
                        print(f'Invalid input: please enter a number between {categories[0]} to {categories[1]}.')
                        continue

                break

            df[selection] = [num]

        else:
            for i, cat in enumerate(categories):
                print(f'{i} : {cat}')

            num = None

            while(True):
                num = input('Enter: ')

                try:
                    num = int(num)
                except:
                    print(f'Invalid input: please enter a number between 0 to {len(categories)-1}.')
                    continue

                if (num < 0 or num >= len(categories)):
                    print(f'Invalid input: please enter a number between 0 to {len(categories)-1}.')
                    continue

                break 
            
            df[selection] = [categories[num]]
                    

    df = pd.DataFrame(df)

    return df

def process_input(df):
    # Replace Binary Columns No with 0
    df.iloc[:,5:14] = df.iloc[:,5:14].replace('No', 0)
    df['HeartDisease'] = df['HeartDisease'].replace('No', 0)

    # Replace Binary Columns Yes with 1
    df.iloc[:,5:14] = df.iloc[:,5:14].replace('Yes', 1)
    df['HeartDisease'] = df['HeartDisease'].replace('Yes', 1)

    # Rename Sex column to Female
    df = df.rename({'Sex': 'Female'}, axis=1)

    # Replace Male with 0, Female with 1, and some typo corrections
    df = df.replace({
        'Female': {
            'Female': 1,
            'Male': 0
        },
        'Diabetic': {
            'No, borderline diabetes': 'No_borderline_diabetes',
            'Yes (during pregnancy)': 'Yes_during_pregnancy'
        },
        'Race': {
            'American Indian/Alaskan Native': 'American_Indian_or_Alaskan_Native'
        },
        'GenHealth': {
            'Very good': 'Very_good'
        }

    })

    df.head()

    df_ds = pd.read_csv("/content/drive/Shared drives/ECS171 Group Project Team 11/heart_2020_cleaned.csv")
    df_ds = df_ds[[
        'BMI',
        'PhysicalHealth',
        'MentalHealth',
        'SleepTime'
    ]]
    scaler = MinMaxScaler()
    scaler.fit(df_ds)
    df.iloc[:, 1:5] = scaler.transform(df.iloc[:, 1:5])

    columns = ['HeartDisease', 'BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime',
              'Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Female',
              'PhysicalActivity', 'Asthma', 'KidneyDisease', 'SkinCancer',
              'AgeCategory__18-24', 'AgeCategory__25-29', 'AgeCategory__30-34',
              'AgeCategory__35-39', 'AgeCategory__40-44', 'AgeCategory__45-49',
              'AgeCategory__50-54', 'AgeCategory__55-59', 'AgeCategory__60-64',
              'AgeCategory__65-69', 'AgeCategory__70-74', 'AgeCategory__75-79',
              'AgeCategory__80 or older', 'Race__American_Indian_or_Alaskan_Native',
              'Race__Asian', 'Race__Black', 'Race__Hispanic', 'Race__Other',
              'Race__White', 'Diabetic__No', 'Diabetic__No_borderline_diabetes',
              'Diabetic__Yes', 'Diabetic__Yes_during_pregnancy',
              'GenHealth__Excellent', 'GenHealth__Fair', 'GenHealth__Good',
              'GenHealth__Poor', 'GenHealth__Very_good']

    for col in columns:
        parts = col.split('__')

        if len(parts) == 2:
            if df[parts[0]][0] == parts[1]:
                df[col] = [1]
            else:
                df[col] = [0]

    df = df.drop(columns=['HeartDisease', 'AgeCategory', 'Race', 'Diabetic', 'GenHealth'])

    return df

In [37]:
df = get_input()
df = process_input(df)
print('Probability of Heart Disease:', model.predict(df)[0][0])

BMI
Enter: 25
PhysicalHealth
Enter: 0
MentalHealth
Enter: 0
SleepTime
Enter: 6
Smoking
0 : No
1 : Yes
Enter: 0
AlcoholDrinking
0 : No
1 : Yes
Enter: 0
Stroke
0 : No
1 : Yes
Enter: 0
DiffWalking
0 : No
1 : Yes
Enter: 0
Sex
0 : Female
1 : Male
Enter: 0
PhysicalActivity
0 : No
1 : Yes
Enter: 1
Asthma
0 : No
1 : Yes
Enter: 0
KidneyDisease
0 : No
1 : Yes
Enter: 0
SkinCancer
0 : No
1 : Yes
Enter: 0
AgeCategory
0 : 55-59
1 : 80 or older
2 : 65-69
3 : 75-79
4 : 40-44
5 : 70-74
6 : 60-64
7 : 50-54
8 : 45-49
9 : 18-24
10 : 35-39
11 : 30-34
12 : 25-29
Enter: 9
Race
0 : White
1 : Black
2 : Asian
3 : American Indian/Alaskan Native
4 : Other
5 : Hispanic
Enter: 0
Diabetic
0 : Yes
1 : No
2 : No, borderline diabetes
3 : Yes (during pregnancy)
Enter: 1
GenHealth
0 : Very good
1 : Fair
2 : Good
3 : Poor
4 : Excellent
Enter: 4
Probability of Heart Disease: 0.0012485683
