In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

In [None]:
file_path = 'ObesityDataSet_raw_and_data_sinthetic 3.xlsx'  # Update this to the path of your Excel file
data = pd.read_excel(file_path)

In [None]:
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [None]:
X = data.drop('NObeyesdad', axis=1)
y = data['NObeyesdad']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

In [None]:
y_pred = rf_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [None]:
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_rep)

Accuracy: 0.8416075650118203
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        56
           1       0.67      0.82      0.74        62
           2       0.83      0.83      0.83        78
           3       0.86      0.95      0.90        58
           4       1.00      1.00      1.00        63
           5       0.76      0.70      0.73        56
           6       0.89      0.62      0.73        50

    accuracy                           0.84       423
   macro avg       0.85      0.84      0.84       423
weighted avg       0.85      0.84      0.84       423



In [None]:
# Group data by 'NObeyesdad' column and calculate mean for numerical attributes
obesity_grouped = data.groupby('NObeyesdad').mean()

In [None]:
obesity_grouped.reset_index(inplace=True)

In [None]:
# Display the result
print(obesity_grouped)

# Optionally, save the result to an Excel file
obesity_grouped.to_excel('result.xlsx', index=False)

   NObeyesdad    Gender        Age  family_history_with_overweight      FAVC  \
0           0  0.363971  19.783237                        0.463235  0.812500   
1           1  0.508711  21.738676                        0.540070  0.724739   
2           2  0.555556  25.884941                        0.980057  0.968661   
3           3  0.993266  28.233785                        0.996633  0.976431   
4           4  0.003086  23.495554                        1.000000  0.996914   
5           5  0.500000  23.417674                        0.720690  0.924138   
6           6  0.644828  26.996981                        0.937931  0.744828   

       FCVC       NCP      CAEC     SMOKE      CH2O       SCC       FAF  \
0  2.480788  2.914403  1.551471  0.003676  1.871281  0.080882  1.250131   
1  2.334495  2.738676  1.501742  0.045296  1.850174  0.104530  1.247387   
2  2.186050  2.431862  1.951567  0.017094  2.112218  0.005698  0.986748   
3  2.391284  2.744555  1.986532  0.050505  1.877658  0.0033

In [None]:
pivot = pd.pivot_table(data, values=['Age', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE'],
                       index=['NObeyesdad'],
                       aggfunc={'Age': [min, max, np.mean],
                                'FCVC': np.mean,
                                'NCP': np.mean,
                                'CH2O': np.mean,
                                'FAF': np.mean,
                                'TUE': np.mean})

In [None]:
# Reset the index to turn grouped columns back into regular columns
pivot.reset_index(inplace=True)

In [None]:
# Display the pivot table
print(pivot)

  NObeyesdad   Age                       CH2O       FAF      FCVC       NCP  \
               max       mean   min      mean      mean      mean      mean   
0          0  39.0  19.783237  16.0  1.871281  1.250131  2.480788  2.914403   
1          1  61.0  21.738676  14.0  1.850174  1.247387  2.334495  2.738676   
2          2  52.0  25.884941  15.0  2.112218  0.986748  2.186050  2.431862   
3          3  41.0  28.233785  20.0  1.877658  0.971857  2.391284  2.744555   
4          4  26.0  23.495554  18.0  2.208493  0.664817  3.000000  3.000000   
5          5  55.0  23.417674  16.0  2.058725  1.056796  2.264631  2.504218   
6          6  56.0  26.996981  17.0  2.025133  0.958072  2.260578  2.495529   

        TUE  
       mean  
0  0.839459  
1  0.675958  
2  0.676743  
3  0.515186  
4  0.604623  
5  0.612992  
6  0.697275  
