In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score

In [None]:
dataset = pd.read_csv("obesity.csv")
print("Classes:", dataset['NObeyesdad'].unique())
print(dataset.shape)

Classes: ['Normal_Weight' 'Overweight_Level_I' 'Overweight_Level_II'
 'Obesity_Type_I' 'Insufficient_Weight' 'Obesity_Type_II'
 'Obesity_Type_III']
(2111, 17)


In [None]:
X = dataset.drop('NObeyesdad', axis = 1)

y = dataset['NObeyesdad']
categories = [['Normal_Weight', 'Overweight_Level_I', 'Overweight_Level_II',
 'Obesity_Type_I', 'Insufficient_Weight', 'Obesity_Type_II',
 'Obesity_Type_III']]
encoder = OneHotEncoder(categories=categories, sparse_output=False)
y = encoder.fit_transform(y.values.reshape(-1,1))

Encode each of the categorical variables

In [None]:
cat = [['no', 'Sometimes', 'Frequently', 'Always']]
encoder = OrdinalEncoder(categories=cat)
X['CAEC'] = encoder.fit_transform(dataset[['CAEC']])
X['CALC'] = encoder.fit_transform(dataset[['CALC']])


In [None]:
c = [['Automobile', 'Motorbike', 'Bike', 'Public_Transportation', 'Walking']]
ord_encoder = OrdinalEncoder(categories=c)
X['MTRANS'] = ord_encoder.fit_transform(dataset[['MTRANS']])
print(X['MTRANS'])


0       3.0
1       3.0
2       3.0
3       4.0
4       3.0
       ... 
2106    3.0
2107    3.0
2108    3.0
2109    3.0
2110    3.0
Name: MTRANS, Length: 2111, dtype: float64


In [None]:
label_encoder = LabelEncoder()
X['family_history_with_overweight'] = label_encoder.fit_transform(dataset['family_history_with_overweight'])
X['Gender'] = label_encoder.fit_transform(dataset['Gender'])
X['FAVC'] = label_encoder.fit_transform(dataset['FAVC'])
X['SMOKE'] = label_encoder.fit_transform(dataset['SMOKE'])
X['SCC'] = label_encoder.fit_transform(dataset['SCC'])

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = X.columns)

In [None]:
data_train, data_test, class_train, class_test = train_test_split(X, y, test_size=0.2)
mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.4, batch_size = 100, hidden_layer_sizes = (17, 20), max_iter = 600)

In [None]:
mlp.fit(data_train, class_train)


In [None]:
pred = mlp.predict(data_test)

In [None]:
print("Accuracy : ", accuracy_score(class_test, pred))
print("Mean Square Error : ", mean_squared_error(class_test, pred))

Accuracy :  0.9479905437352246
Mean Square Error :  0.012833502195204323


In [None]:
print(multilabel_confusion_matrix(class_test, pred))

print("Classification Report : ")
print(classification_report(class_test, pred))

[[[367   3]
  [  3  50]]

 [[353   3]
  [  8  59]]

 [[361   3]
  [  5  54]]

 [[362   3]
  [  2  56]]

 [[365   2]
  [  0  56]]

 [[359   3]
  [  2  59]]

 [[354   0]
  [  1  68]]]
Classification Report : 
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        53
           1       0.95      0.88      0.91        67
           2       0.95      0.92      0.93        59
           3       0.95      0.97      0.96        58
           4       0.97      1.00      0.98        56
           5       0.95      0.97      0.96        61
           6       1.00      0.99      0.99        69

   micro avg       0.96      0.95      0.95       423
   macro avg       0.96      0.95      0.95       423
weighted avg       0.96      0.95      0.95       423
 samples avg       0.95      0.95      0.95       423



  _warn_prf(average, modifier, msg_start, len(result))
