<a href="https://colab.research.google.com/github/Venura-Shiromal/Weight-Classifier/blob/main/CatBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Modules

In [41]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.model_selection import train_test_split

# Input

## Loading Training Data

In [2]:
df = pd.read_csv("data/train.csv")

In [22]:
df.head()

Unnamed: 0,Age_Years,High_Calorie_Food,Vegetable_Intake,Meal_Frequency,Water_Intake,Screen_Time_Hours,Family_Risk,Activity_Level_Score,Gender,Family_History,...,Commute_Mode_Bike,Commute_Mode_Motorbike,Commute_Mode_Public_Transportation,Commute_Mode_Walking,Leisure Time Activity_Gaming,Leisure Time Activity_Music,Leisure Time Activity_Painting,Leisure Time Activity_Reading,Leisure Time Activity_Sport,Leisure Time Activity_Sports
0,26.0,1,3.0,3.0,2.679137,0.479348,1.0,0.479348,0,1,...,False,False,True,False,False,False,False,True,False,False
1,25.483381,0,2.0,1.0,1.0,0.0,1.0,0.740633,0,1,...,False,False,True,False,False,False,False,True,False,False
2,26.0,1,3.0,3.0,2.57721,0.402075,1.0,0.402075,0,1,...,False,False,True,False,False,False,False,True,False,False
3,21.715198,1,3.031308,3.322455,1.983531,-0.005858,0.987933,0.360441,1,1,...,False,False,True,False,False,False,False,True,False,False
4,17.511767,1,1.964873,1.052297,2.025586,0.981999,1.004136,1.162488,0,1,...,False,False,True,False,True,False,False,False,False,False


# Preprocessing

### Combined Fields

In [4]:
df["BMI"] = df["Weight_Kg"]/(df["Height_cm"]/100)**2

### Removing unwanted fields

In [5]:
drop_cols = [
    "PersonID", 
    "Weight_Kg", 
    "Height_cm" 
]

In [6]:
df = df.drop(drop_cols, axis=1)

### Removing missing cells

In [7]:
df.isna().sum()[df.isna().sum() > 0]

Gender                       30
Alcohol_Consumption          37
Physical_Activity_Level    1498
dtype: int64

In [8]:
df = df.drop(["Physical_Activity_Level"], axis=1)

In [9]:
missing_cols = ["Gender", "Alcohol_Consumption"]
df = df.dropna(subset=missing_cols)

### Mapping

In [10]:
map_YN = {
    "Yes" : 1,
    "yes" : 1,
    "yess" : 1,
    "No" : 0,
    "no" : 0
}

map_Gender = {
    "Male": 1,
    "Female": 0
}

map_Frq = {
    "no" : 0,
    "Never" : 0,
    "Sometimes" : 1,
    "Occasionally" : 1,
    "Frequently" : 2,
    "Often" : 2,
    "Always" : 3
}

map_Cat = {
    'Insufficient_Weight': 0,
    'Normal_Weight': 1,
    'Overweight_Level_I': 2,
    'Overweight_Level_II': 3,
    'Obesity_Type_I': 4,
    'Obesity_Type_II': 5,
    'Obesity_Type_III': 6
}

In [11]:
df["Gender"] = df["Gender"].map(map_Gender)
df["High_Calorie_Food"] = df["High_Calorie_Food"].map(map_YN)
df["Family_History"] = df["Family_History"].map(map_YN)
df["Smoking_Habit"] = df["Smoking_Habit"].map(map_YN)
df["Snack_Frequency"] = df["Snack_Frequency"].map(map_Frq)
df["Alcohol_Consumption"] = df["Alcohol_Consumption"].map(map_Frq)
df["Weight_Category"] = df["Weight_Category"].map(map_Cat)

### One Hot Encoding

In [12]:
df = pd.get_dummies(df, columns=['Commute_Mode','Leisure Time Activity'])

## Defining X,Y

In [13]:
x = df.drop(["Weight_Category"], axis=1)
y = df["Weight_Category"]

## Data Splitting

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

# Training

In [15]:
model = Sequential([
        Input(shape=[25]),
        Dense(25, activation="relu", name="Layer_In"),
        Dense(15, activation="relu", name="Layer_H1"),
        Dense(7, activation="softmax", name="Layer_Out")
    ])

In [16]:
model.compile(
    optimizer='adam',
    loss="sparse_categorical_crossentropy",
    metrics=['accuracy']
)

In [17]:
model.summary()

In [18]:
model.fit(x_train, y_train, epochs=20, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/20
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.1602 - loss: 2.4842 - val_accuracy: 0.2902 - val_loss: 1.9069
Epoch 2/20
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2918 - loss: 1.8410 - val_accuracy: 0.2720 - val_loss: 1.7997
Epoch 3/20
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2769 - loss: 1.7738 - val_accuracy: 0.3212 - val_loss: 1.7521
Epoch 4/20
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2918 - loss: 1.7383 - val_accuracy: 0.3705 - val_loss: 1.7324
Epoch 5/20
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3178 - loss: 1.7167 - val_accuracy: 0.3420 - val_loss: 1.7061
Epoch 6/20
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3385 - loss: 1.6977 - val_accuracy: 0.3394 - val_loss: 1.6916
Epoch 7/20
[1m49/49[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1e3f7efd010>

In [27]:
y_pred = model.predict(x_test)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


# Evaluation

In [20]:
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)

print(f"Final Validation Accuracy: {accuracy * 100:.2f}%")
print(f"Final Validation Loss: {loss:.4f}")

Final Validation Accuracy: 52.59%
Final Validation Loss: 1.4118
