In [137]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import sklearn as skl
import tensorflow as tf
from pathlib import Path
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

sql temporarily commented out until database is updated

In [138]:
#import data

# Create Engine
#engine = create_engine("sqlite:///lifestyle_sleep_data.sqlite")
# reflect an existing database into a new model
#Base = automap_base()
# reflect the tables
#Base.prepare(autoload_with=engine)

# Save reference to the table
#Sleep = Base.classes.lifestyle_sleep_data


In [139]:
# Create our session (link) from Python to the DB
#session = Session(engine)
#conn = engine.connect()

In [140]:
# Query All Records in the the Database
#sleep_df = pd.read_sql("SELECT * FROM lifestyle_sleep_data", conn)
#sleep_df=sleep_df.drop(columns = "person_id")
#sleep_df.head()

In [141]:
#temporary direct import of data while waiting for sql database update
sleep_df=pd.read_csv("../Data/Sleep_health_mk1.csv")
#handle lower case/upper case descrepancy
sleep_df.columns=sleep_df.columns.str.strip().str.lower()
#convert spaces to underscore
sleep_df.columns = sleep_df.columns.str.replace(' ', '_')
#remove unnamed column:
sleep_df.drop(columns="unnamed:_0",inplace=True)
#restore caps to BMI
sleep_df=sleep_df.rename(columns={"bmi_category":"BMI_category"})
sleep_df.head()

Unnamed: 0,gender,age,occupation,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,BMI_category,heart_rate,daily_steps,sleep_disorder,systolic_blood_pressure,diastolic_blood_pressure
0,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,,126,83
1,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,,125,80
2,Male,28,Doctor,6.2,6,60,8,Normal,75,10000,,125,80
3,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90
4,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90


Data Preprocessing:

In [142]:
# Verify the categories of the "occupation" column
occupations=sleep_df['occupation'].value_counts()
occupations

Nurse                   73
Doctor                  71
Engineer                63
Lawyer                  47
Teacher                 40
Accountant              37
Salesperson             32
Software Engineer        4
Scientist                4
Sales Representative     2
Manager                  1
Name: occupation, dtype: int64

In [143]:
#bin occupations with fewer than 30 subjects
#determine which occupations qualify and add to a list
occupations_replace=[]
for occ in occupations.items():
    if occ[1]<30:
        occupations_replace.append(occ[0])
#replace said occupations with "Other"
for occ in occupations_replace:
    sleep_df['occupation']=sleep_df['occupation'].replace(occ,"Other")
#verify successful binning
sleep_df['occupation'].value_counts()

Nurse          73
Doctor         71
Engineer       63
Lawyer         47
Teacher        40
Accountant     37
Salesperson    32
Other          11
Name: occupation, dtype: int64

In [144]:
# Transform the occupation column using get_dummies
occupation_dummies = pd.get_dummies(sleep_df['occupation'])

# Concatenate the sleep_df and the occupation_dummies DataFrames
sleep_df = pd.concat([sleep_df, occupation_dummies], axis=1)

# Drop the original occupation column
sleep_df =sleep_df.drop(columns=["occupation"])

# Display the DataFrame
sleep_df.head()

Unnamed: 0,gender,age,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,BMI_category,heart_rate,daily_steps,sleep_disorder,systolic_blood_pressure,diastolic_blood_pressure,Accountant,Doctor,Engineer,Lawyer,Nurse,Other,Salesperson,Teacher
0,Male,27,6.1,6,42,6,Overweight,77,4200,,126,83,0,0,0,0,0,1,0,0
1,Male,28,6.2,6,60,8,Normal,75,10000,,125,80,0,1,0,0,0,0,0,0
2,Male,28,6.2,6,60,8,Normal,75,10000,,125,80,0,1,0,0,0,0,0,0
3,Male,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90,0,0,0,0,0,1,0,0
4,Male,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90,0,0,0,0,0,1,0,0


In [145]:
# Clean-up the "BMI-category"
sleep_df["BMI_category"] = sleep_df["BMI_category"].replace({'Normal': 'Normal Weight', 'Obese': 'Overweight'})
sleep_df['BMI_category'].value_counts()

Normal Weight    216
Overweight       158
Name: BMI_category, dtype: int64

In [146]:
# Transform the "BMI_category" column using get_dummies
bmi_dummies = pd.get_dummies(sleep_df['BMI_category'])

# Concatenate the sleep_df and the bmi_dummies DataFrames
sleep_df = pd.concat([sleep_df, bmi_dummies], axis=1)

# Drop the original "BMI_category"  column
sleep_df =sleep_df.drop(columns=["BMI_category"])

# Display the DataFrame
sleep_df.head()

Unnamed: 0,gender,age,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,heart_rate,daily_steps,sleep_disorder,systolic_blood_pressure,...,Accountant,Doctor,Engineer,Lawyer,Nurse,Other,Salesperson,Teacher,Normal Weight,Overweight
0,Male,27,6.1,6,42,6,77,4200,,126,...,0,0,0,0,0,1,0,0,0,1
1,Male,28,6.2,6,60,8,75,10000,,125,...,0,1,0,0,0,0,0,0,1,0
2,Male,28,6.2,6,60,8,75,10000,,125,...,0,1,0,0,0,0,0,0,1,0
3,Male,28,5.9,4,30,8,85,3000,Sleep Apnea,140,...,0,0,0,0,0,1,0,0,0,1
4,Male,28,5.9,4,30,8,85,3000,Sleep Apnea,140,...,0,0,0,0,0,1,0,0,0,1


In [147]:
# Verify the values of the "gender" column
sleep_df['gender'].value_counts()

Male      189
Female    185
Name: gender, dtype: int64

In [148]:
# Transform the "gender" column using get_dummies
gender_dummies = pd.get_dummies(sleep_df['gender'])

# Concatenate the sleep_df and the gender_dummies DataFrames
sleep_df = pd.concat([sleep_df, gender_dummies], axis=1)

# Drop the original "gender" column
sleep_df =sleep_df.drop(columns=["gender"])

# Display the DataFrame
sleep_df.head()

Unnamed: 0,age,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,heart_rate,daily_steps,sleep_disorder,systolic_blood_pressure,diastolic_blood_pressure,...,Engineer,Lawyer,Nurse,Other,Salesperson,Teacher,Normal Weight,Overweight,Female,Male
0,27,6.1,6,42,6,77,4200,,126,83,...,0,0,0,1,0,0,0,1,0,1
1,28,6.2,6,60,8,75,10000,,125,80,...,0,0,0,0,0,0,1,0,0,1
2,28,6.2,6,60,8,75,10000,,125,80,...,0,0,0,0,0,0,1,0,0,1
3,28,5.9,4,30,8,85,3000,Sleep Apnea,140,90,...,0,0,0,1,0,0,0,1,0,1
4,28,5.9,4,30,8,85,3000,Sleep Apnea,140,90,...,0,0,0,1,0,0,0,1,0,1


In [149]:
# Verify the values of the "sleep_disorder" column
sleep_df['sleep_disorder'].value_counts()

None           219
Sleep Apnea     78
Insomnia        77
Name: sleep_disorder, dtype: int64

In [150]:
#diverge sheets with one preserving Sleep Apnea and Insomnia, vs combining into a single bin
sleep_df2=sleep_df.copy()

In [151]:
# Clean-up the "sleep_disorder" column
sleep_df["sleep_disorder"] = sleep_df["sleep_disorder"].replace({'Sleep Apnea': 'Sleep Disorder', 'Insomnia': 'Sleep Disorder'})
# Verify the values of the "sleep_disorder" column
sleep_df['sleep_disorder'].value_counts()

None              219
Sleep Disorder    155
Name: sleep_disorder, dtype: int64

In [152]:
# Encoding the sleep_disorder column using a custom function
def encode_disorder(disorder):
    """
    This function encodes sleep disorder status by setting sleep apnea as 1 and no as 0.
    """
    if disorder == "Sleep Disorder":
        return 1
    else:
        return 0

# Call the encode_marriage function on the marriage column
sleep_df["sleep_disorder"] = sleep_df["sleep_disorder"].apply(encode_disorder)

# Review the DataFrame 
sleep_df.head()

Unnamed: 0,age,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,heart_rate,daily_steps,sleep_disorder,systolic_blood_pressure,diastolic_blood_pressure,...,Engineer,Lawyer,Nurse,Other,Salesperson,Teacher,Normal Weight,Overweight,Female,Male
0,27,6.1,6,42,6,77,4200,0,126,83,...,0,0,0,1,0,0,0,1,0,1
1,28,6.2,6,60,8,75,10000,0,125,80,...,0,0,0,0,0,0,1,0,0,1
2,28,6.2,6,60,8,75,10000,0,125,80,...,0,0,0,0,0,0,1,0,0,1
3,28,5.9,4,30,8,85,3000,1,140,90,...,0,0,0,1,0,0,0,1,0,1
4,28,5.9,4,30,8,85,3000,1,140,90,...,0,0,0,1,0,0,0,1,0,1


In [153]:
# Transform the "sleep_disorder" column using get_dummies
disorder_dummies = pd.get_dummies(sleep_df2['sleep_disorder'])

# Identify target columns for later
target_col=list(disorder_dummies.columns)

# Concatenate the sleep_df and the disorder_dummies DataFrames
sleep_df2 = pd.concat([sleep_df2, disorder_dummies], axis=1)

# Drop the original "sleep_disorder" column
sleep_df2 = sleep_df2.drop(columns=["sleep_disorder"])

# Display the DataFrame
sleep_df2.head()

Unnamed: 0,age,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,heart_rate,daily_steps,systolic_blood_pressure,diastolic_blood_pressure,Accountant,...,Other,Salesperson,Teacher,Normal Weight,Overweight,Female,Male,Insomnia,None,Sleep Apnea
0,27,6.1,6,42,6,77,4200,126,83,0,...,1,0,0,0,1,0,1,0,1,0
1,28,6.2,6,60,8,75,10000,125,80,0,...,0,0,0,1,0,0,1,0,1,0
2,28,6.2,6,60,8,75,10000,125,80,0,...,0,0,0,1,0,0,1,0,1,0
3,28,5.9,4,30,8,85,3000,140,90,0,...,1,0,0,0,1,0,1,0,0,1
4,28,5.9,4,30,8,85,3000,140,90,0,...,1,0,0,0,1,0,1,0,0,1


In [154]:
target_col

['Insomnia', 'None', 'Sleep Apnea']

Neural Network Time

In [125]:
# Seperate the features, X,  from the target variable, y
y = sleep_df['sleep_disorder']
X = sleep_df.drop(columns='sleep_disorder')

In [126]:
#split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [127]:
X_train.head()

Unnamed: 0,age,sleep_duration,quality_of_sleep,physical_activity_level,stress_level,heart_rate,daily_steps,systolic_blood_pressure,diastolic_blood_pressure,Accountant,...,Engineer,Lawyer,Nurse,Other,Salesperson,Teacher,Normal Weight,Overweight,Female,Male
59,32,7.7,7,75,6,70,8000,120,80,0,...,0,0,0,0,0,0,1,0,0,1
12,29,6.1,6,30,8,70,8000,120,80,0,...,0,0,0,0,0,0,1,0,0,1
289,50,6.1,6,90,8,75,10000,140,95,0,...,0,0,1,0,0,0,0,1,1,0
201,43,7.8,8,90,5,70,8000,130,85,0,...,1,0,0,0,0,0,1,0,0,1
91,35,7.3,8,60,4,65,5000,125,80,0,...,1,0,0,0,0,0,1,0,0,1


In [128]:
#Scale X
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)



In [129]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train.columns)
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 8)                 176       
                                                                 
 dense_1 (Dense)             (None, 5)                 45        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
Total params: 227 (908.00 Byte)
Trainable params: 227 (908.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [130]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

In [131]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [132]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

3/3 - 0s - loss: 0.3582 - accuracy: 0.9149 - 191ms/epoch - 64ms/step
Loss: 0.3582134246826172, Accuracy: 0.914893627166748


In [133]:
# Export our model to HDF5 file
nn.save_weights('sleep_model.hdf5')

Try again without the sleep disorders binned

In [155]:
# Seperate the features, X,  from the target variable, y
y2 = sleep_df2[target_col]
X2 = sleep_df2.drop(columns=target_col)

In [156]:
#split dataset
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, random_state=1)

In [157]:
#Scale X
scaler2 = StandardScaler()

# Fit the StandardScaler
X_scaler2 = scaler2.fit(X_train2)

# Scale the data
X_train_scaled2 = X_scaler2.transform(X_train2)
X_test_scaled2 = X_scaler2.transform(X_test2)

In [161]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features2 = len(X_train2.columns)
hidden_nodes2_layer1 =  8
hidden_nodes2_layer2 = 5

nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(
    tf.keras.layers.Dense(units=hidden_nodes2_layer1, input_dim=number_input_features2, activation="relu")
)

# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes2_layer2, activation="relu"))

# Output layer
nn2.add(tf.keras.layers.Dense(units=3, activation="sigmoid"))

# Check the structure of the model
nn2.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 8)                 176       
                                                                 
 dense_7 (Dense)             (None, 5)                 45        
                                                                 
 dense_8 (Dense)             (None, 3)                 18        
                                                                 
Total params: 239 (956.00 Byte)
Trainable params: 239 (956.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [162]:
# Compile the model
nn2.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

In [163]:
# Train the model
fit_model2 = nn2.fit(X_train_scaled2,y_train2,epochs=100)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [164]:
# Evaluate the model using the test data
model_loss2, model_accuracy2 = nn2.evaluate(X_test_scaled2,y_test2,verbose=2)
print(f"Loss: {model_loss2}, Accuracy: {model_accuracy2}")

3/3 - 0s - loss: 0.2911 - accuracy: 0.8830 - 138ms/epoch - 46ms/step
Loss: 0.2910829484462738, Accuracy: 0.8829787373542786


In [165]:
# Export our model to HDF5 file
nn2.save_weights('sleep_model2.hdf5')