<a href="https://colab.research.google.com/github/Venura-Shiromal/Weight-Classifier/blob/main/Sequential.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing Modules

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight

# Input

## Loading Training Data

In [3]:
df = pd.read_csv("data/train.csv")

In [4]:
df.head()

Unnamed: 0,PersonID,Age_Years,Weight_Kg,High_Calorie_Food,Vegetable_Intake,Meal_Frequency,Water_Intake,Screen_Time_Hours,Family_Risk,Activity_Level_Score,Gender,Family_History,Snack_Frequency,Smoking_Habit,Alcohol_Consumption,Commute_Mode,Weight_Category,Height_cm,Physical_Activity_Level,Leisure Time Activity
0,P1810,26.0,109.959714,yes,3.0,3.0,2.679137,0.479348,1.0,0.479348,Female,yes,Occasionally,no,Sometimes,Public_Transportation,Obesity_Type_III,162.2771,,Reading
1,P1021,25.483381,64.848627,no,2.0,1.0,1.0,0.0,1.0,0.740633,Female,yes,Occasionally,no,no,Public_Transportation,Overweight_Level_II,156.5288,,Reading
2,P2036,26.0,104.947703,yes,3.0,3.0,2.57721,0.402075,1.0,0.402075,Female,yes,Occasionally,no,Sometimes,Public_Transportation,Obesity_Type_III,162.1167,,Reading
3,P2201,21.715198,107.868047,Yes,3.031308,3.322455,1.983531,-0.005858,0.987933,0.360441,Male,yes,Occasionally,no,no,Public_Transportation,Normal_Weight,167.748287,,Reading
4,P2649,17.511767,121.460361,yes,1.964873,1.052297,2.025586,0.981999,1.004136,1.162488,Female,yes,Occasionally,no,no,Public_Transportation,Normal_Weight,170.956194,,Gaming


# Preprocessing

### Combined Fields

In [5]:
df["BMI"] = df["Weight_Kg"]/(df["Height_cm"]/100)**2

### Removing unwanted fields

In [6]:
drop_cols = [
    "PersonID",
    "Weight_Kg",
    "Height_cm"
]

In [7]:
df = df.drop(drop_cols, axis=1)

### Removing missing cells

In [8]:
df.isna().sum()[df.isna().sum() > 0]

Unnamed: 0,0
Gender,30
Alcohol_Consumption,37
Physical_Activity_Level,1498


In [9]:
df = df.drop(["Physical_Activity_Level"], axis=1)

In [10]:
missing_cols = ["Gender", "Alcohol_Consumption"]
df = df.dropna(subset=missing_cols)

### Mapping

In [11]:
map_YN = {
    "Yes" : 1,
    "yes" : 1,
    "yess" : 1,
    "No" : 0,
    "no" : 0
}

map_Gender = {
    "Male": 1,
    "Female": 0
}

map_Frq = {
    "no" : 0,
    "Never" : 0,
    "Sometimes" : 1,
    "Occasionally" : 1,
    "Frequently" : 2,
    "Often" : 2,
    "Always" : 3
}

map_Cat = {
    'Insufficient_Weight': 0,
    'Normal_Weight': 1,
    'Overweight_Level_I': 2,
    'Overweight_Level_II': 3,
    'Obesity_Type_I': 4,
    'Obesity_Type_II': 5,
    'Obesity_Type_III': 6
}

In [12]:
df["Gender"] = df["Gender"].map(map_Gender)
df["High_Calorie_Food"] = df["High_Calorie_Food"].map(map_YN)
df["Family_History"] = df["Family_History"].map(map_YN)
df["Smoking_Habit"] = df["Smoking_Habit"].map(map_YN)
df["Snack_Frequency"] = df["Snack_Frequency"].map(map_Frq)
df["Alcohol_Consumption"] = df["Alcohol_Consumption"].map(map_Frq)
df["Weight_Category"] = df["Weight_Category"].map(map_Cat)

### One Hot Encoding

In [13]:
df = pd.get_dummies(df, columns=['Commute_Mode','Leisure Time Activity'])

## Defining X,Y

In [14]:
x = df.drop(["Weight_Category"], axis=1)
y = df["Weight_Category"]

## Data Splitting

In [15]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

## Feature Scaling

In [16]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Computing Class Weights

In [17]:
weights = compute_class_weight("balanced", classes=np.unique(y_train), y=y_train)
class_weights = dict(zip(np.unique(y_train), weights))
print("Class weights:", class_weights)

Class weights: {np.int64(0): np.float64(1.223809523809524), np.int64(1): np.float64(1.0851513019000705), np.int64(2): np.float64(1.0151415404871627), np.int64(3): np.float64(1.0245847176079734), np.int64(4): np.float64(0.8407851690294439), np.int64(5): np.float64(1.00587084148728), np.int64(6): np.float64(0.8954703832752613)}


# Training

In [18]:
model = Sequential([
        Input(shape=[25]),
        Dense(64, activation="relu", name="Layer_In"),
        Dropout(0.2),
        Dense(32, activation="relu", name="Layer_H1"),
        Dropout(0.2),
        Dense(16, activation="relu", name="Layer_H2"),
        Dense(7, activation="softmax", name="Layer_Out")
    ])

In [20]:
model.compile(
    optimizer='adam',
    loss=SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [21]:
model.summary()

## Early Stopping Setup

In [22]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    verbose=1
)

In [23]:
model.fit(x_train, y_train,
          epochs=50,
          batch_size=16,
          validation_data=(x_test, y_test),
          class_weight=class_weights,
          callbacks=[early_stopping],
          verbose=2)

Epoch 1/50


  output, from_logits = _get_logits(


97/97 - 2s - 20ms/step - accuracy: 0.2244 - loss: 1.9046 - val_accuracy: 0.3316 - val_loss: 1.7681
Epoch 2/50
97/97 - 0s - 3ms/step - accuracy: 0.3281 - loss: 1.7138 - val_accuracy: 0.4508 - val_loss: 1.5114
Epoch 3/50
97/97 - 0s - 3ms/step - accuracy: 0.4125 - loss: 1.5576 - val_accuracy: 0.4689 - val_loss: 1.3955
Epoch 4/50
97/97 - 0s - 3ms/step - accuracy: 0.4293 - loss: 1.4908 - val_accuracy: 0.5104 - val_loss: 1.3478
Epoch 5/50
97/97 - 0s - 3ms/step - accuracy: 0.4611 - loss: 1.4285 - val_accuracy: 0.5155 - val_loss: 1.3170
Epoch 6/50
97/97 - 0s - 3ms/step - accuracy: 0.4643 - loss: 1.4278 - val_accuracy: 0.5285 - val_loss: 1.2989
Epoch 7/50
97/97 - 0s - 3ms/step - accuracy: 0.4929 - loss: 1.3827 - val_accuracy: 0.5440 - val_loss: 1.2769
Epoch 8/50
97/97 - 0s - 3ms/step - accuracy: 0.5143 - loss: 1.3535 - val_accuracy: 0.5699 - val_loss: 1.2627
Epoch 9/50
97/97 - 0s - 3ms/step - accuracy: 0.5175 - loss: 1.3515 - val_accuracy: 0.5648 - val_loss: 1.2447
Epoch 10/50
97/97 - 0s - 3ms/

<keras.src.callbacks.history.History at 0x7d642d05a9c0>

# Evaluation

In [24]:
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)

print(f"Final Validation Accuracy: {accuracy * 100:.2f}%")
print(f"Final Validation Loss: {loss:.4f}")

Final Validation Accuracy: 71.24%
Final Validation Loss: 1.0177
