## Preprocessing

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import pandas as pd
from collections import Counter

health_df = pd.read_csv("data\processed_obesity_data_NO_WEIGHT.csv")
health_df = health_df.reset_index(drop=True).drop(columns=['Unnamed: 0'])

In [2]:
health_df.head()

Unnamed: 0,Age,Overweight_Family_History,High_Caloric_Food_Frequency,Mmain_Meals_per_Day,Smoking_Status,Daily_Water_Intake (L),Monitor_Calorie_Intake,Physical_Activity,Screentime (hrs),Gender_Female,...,Alcohol_Frequency_Always,Alcohol_Frequency_Frequently,Alcohol_Frequency_Sometimes,Alcohol_Frequency_no,Mode_of_Transportaion_Automobile,Mode_of_Transportaion_Bike,Mode_of_Transportaion_Motorbike,Mode_of_Transportaion_Public_Transportation,Mode_of_Transportaion_Walking,Obesity_Level
0,21.0,1,0,0.0,0,2.0,0,0.0,1.0,1,...,0,0,0,1,0,0,0,1,0,1
1,21.0,1,0,0.0,1,3.0,1,3.0,0.0,1,...,0,0,1,0,0,0,0,1,0,1
2,23.0,1,0,0.0,0,2.0,0,2.0,1.0,0,...,0,1,0,0,0,0,0,1,0,1
3,27.0,0,0,0.0,0,2.0,0,2.0,0.0,0,...,0,1,0,0,0,0,0,0,1,2
4,22.0,0,0,0.0,0,2.0,0,0.0,0.0,0,...,0,0,1,0,0,0,0,1,0,2


In [3]:
print(len(health_df.columns))

print('========================================')
print('========================================')

print(health_df.columns)

28
Index(['Age', 'Overweight_Family_History', 'High_Caloric_Food_Frequency',
       'Mmain_Meals_per_Day', 'Smoking_Status', 'Daily_Water_Intake (L)',
       'Monitor_Calorie_Intake', 'Physical_Activity', 'Screentime (hrs)',
       'Gender_Female', 'Gender_Male', 'Meal_Vegetable_Intake_Always',
       'Meal_Vegetable_Intake_Frequently', 'Meal_Vegetable_Intake_Sometimes',
       'Food_Between_Meals_Always', 'Food_Between_Meals_Frequently',
       'Food_Between_Meals_Sometimes', 'Food_Between_Meals_no',
       'Alcohol_Frequency_Always', 'Alcohol_Frequency_Frequently',
       'Alcohol_Frequency_Sometimes', 'Alcohol_Frequency_no',
       'Mode_of_Transportaion_Automobile', 'Mode_of_Transportaion_Bike',
       'Mode_of_Transportaion_Motorbike',
       'Mode_of_Transportaion_Public_Transportation',
       'Mode_of_Transportaion_Walking', 'Obesity_Level'],
      dtype='object')


In [4]:
overweight_or_obese_df = health_df.loc[health_df['Obesity_Level'] >= 2].copy()

In [5]:
overweight_or_obese_df.head()

Unnamed: 0,Age,Overweight_Family_History,High_Caloric_Food_Frequency,Mmain_Meals_per_Day,Smoking_Status,Daily_Water_Intake (L),Monitor_Calorie_Intake,Physical_Activity,Screentime (hrs),Gender_Female,...,Alcohol_Frequency_Always,Alcohol_Frequency_Frequently,Alcohol_Frequency_Sometimes,Alcohol_Frequency_no,Mode_of_Transportaion_Automobile,Mode_of_Transportaion_Bike,Mode_of_Transportaion_Motorbike,Mode_of_Transportaion_Public_Transportation,Mode_of_Transportaion_Walking,Obesity_Level
3,27.0,0,0,0.0,0,2.0,0,2.0,0.0,0,...,0,1,0,0,0,0,0,0,1,2
4,22.0,0,0,0.0,0,2.0,0,0.0,0.0,0,...,0,0,1,0,0,0,0,1,0,2
10,26.0,1,1,0.0,0,3.0,0,2.0,2.0,0,...,0,0,1,0,0,0,0,1,0,3
11,21.0,1,1,0.0,0,2.0,1,2.0,1.0,1,...,0,0,1,0,0,0,0,1,0,2
13,41.0,0,1,0.0,0,2.0,0,2.0,1.0,0,...,0,1,0,0,1,0,0,0,0,3


In [6]:
overweight_or_obese_df['Obesity_Level'].replace({2: 0,
                                    3: 1},
                                   inplace= True)

In [7]:
print(Counter(overweight_or_obese_df['Obesity_Level']))

Counter({1: 972, 0: 580})


In [8]:
# Split our preprocessed data into our features and target arrays
X = overweight_or_obese_df.drop(['Obesity_Level', 'Overweight_Family_History', 'Age'], axis=1).values
y = overweight_or_obese_df['Obesity_Level'].values

In [9]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=31)

In [10]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [11]:
input_features = X_train_scaled.shape[1]

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=28, activation='relu', input_dim=input_features))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=20, activation='relu'))

nn.add(tf.keras.layers.Dense(units=20, activation='relu'))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 28)                728       
                                                                 
 dense_1 (Dense)             (None, 20)                580       
                                                                 
 dense_2 (Dense)             (None, 20)                420       
                                                                 
 dense_3 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,749
Trainable params: 1,749
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=80)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [14]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

13/13 - 0s - loss: 0.5568 - accuracy: 0.8015 - 395ms/epoch - 30ms/step
Loss: 0.5567880868911743, Accuracy: 0.8015463948249817
