In [18]:
import pandas as pd
import sqlite3
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [19]:
# Load the dataset into a Pandas DataFrame
file_path = "ObesityDataSet_raw_and_data_sinthetic.csv" 
data = pd.read_csv(file_path)
# Display the first few rows of the dataset)
data.head()

Unnamed: 0,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,Female,21.0,1.62,64.0,yes,no,2.0,3.0,Sometimes,no,2.0,no,0.0,1.0,no,Public_Transportation,Normal_Weight
1,Female,21.0,1.52,56.0,yes,no,3.0,3.0,Sometimes,yes,3.0,yes,3.0,0.0,Sometimes,Public_Transportation,Normal_Weight
2,Male,23.0,1.8,77.0,yes,no,2.0,3.0,Sometimes,no,2.0,no,2.0,1.0,Frequently,Public_Transportation,Normal_Weight
3,Male,27.0,1.8,87.0,no,no,3.0,3.0,Sometimes,no,2.0,no,2.0,0.0,Frequently,Walking,Overweight_Level_I
4,Male,22.0,1.78,89.8,no,no,2.0,1.0,Sometimes,no,2.0,no,0.0,0.0,Sometimes,Public_Transportation,Overweight_Level_II


In [20]:
# Calculate BMI
data['BMI'] = data['Weight'] / (data['Height'] ** 2)

# Classify Obesity based on BMI
def classify_bmi(bmi):
    if bmi < 18.5:
        return 'Underweight'
    elif 18.5 <= bmi < 25:
        return 'Normal Weight'
    elif 25 <= bmi < 30:
        return 'Overweight_Level_I'
    else:
        return 'Overweight_Level_II'

data['Obesity_Class'] = data['BMI'].apply(classify_bmi)

In [21]:
# One-hot encode categorical columns
categorical_columns = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 
                       'SMOKE', 'SCC', 'CALC', 'MTRANS']

data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

In [22]:
# Connect to SQLite database
conn = sqlite3.connect('obesity_data.db')
cursor = conn.cursor()

# Save data to the database
data.to_sql('obesity_data', conn, if_exists='replace', index=False)

# Verify table creation
print(pd.read_sql("SELECT * FROM obesity_data LIMIT 5;", conn))

    Age  Height  Weight  FCVC  NCP  CH2O  FAF  TUE           NObeyesdad  \
0  21.0    1.62    64.0   2.0  3.0   2.0  0.0  1.0        Normal_Weight   
1  21.0    1.52    56.0   3.0  3.0   3.0  3.0  0.0        Normal_Weight   
2  23.0    1.80    77.0   2.0  3.0   2.0  2.0  1.0        Normal_Weight   
3  27.0    1.80    87.0   3.0  3.0   2.0  2.0  0.0   Overweight_Level_I   
4  22.0    1.78    89.8   2.0  1.0   2.0  0.0  0.0  Overweight_Level_II   

         BMI  ... CAEC_no  SMOKE_yes  SCC_yes  CALC_Frequently  \
0  24.386526  ...       0          0        0                0   
1  24.238227  ...       0          1        1                0   
2  23.765432  ...       0          0        0                1   
3  26.851852  ...       0          0        0                1   
4  28.342381  ...       0          0        0                0   

   CALC_Sometimes  CALC_no  MTRANS_Bike  MTRANS_Motorbike  \
0               0        1            0                 0   
1               1        0    

In [25]:
query = """
SELECT 
    CASE WHEN Gender_Male = 1 THEN 'Male' ELSE 'Female' END AS Gender, 
    Age, 
    Height, 
    Weight, 
    Obesity_Class 
FROM obesity_data 
WHERE Obesity_Class = 'Overweight_Level_I';
"""
result = pd.read_sql(query, conn)
print(result)


     Gender        Age    Height      Weight       Obesity_Class
0      Male  27.000000  1.800000   87.000000  Overweight_Level_I
1      Male  22.000000  1.780000   89.800000  Overweight_Level_I
2    Female  21.000000  1.720000   80.000000  Overweight_Level_I
3      Male  27.000000  1.930000  102.000000  Overweight_Level_I
4    Female  30.000000  1.710000   82.000000  Overweight_Level_I
..      ...        ...       ...         ...                 ...
561    Male  23.807181  1.729177   82.527240  Overweight_Level_I
562    Male  39.585811  1.719153   86.464843  Overweight_Level_I
563  Female  45.821267  1.687326   80.413997  Overweight_Level_I
564  Female  23.000000  1.665199   83.151150  Overweight_Level_I
565  Female  22.899740  1.661715   82.595793  Overweight_Level_I

[566 rows x 5 columns]
