<a href="https://colab.research.google.com/github/joaocarvoli/Machine-Learning/blob/main/Exercises/10_problem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# `10 - Problem`
## In this problem our goal is to apply advanced tree-based models, boosting techniques and make a introduction to the artificial neural networks 

### The dataset that we'll use is [Pima Indians Diabetes Database](https://data.world/data-society/pima-indians-diabetes-database).

In [13]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import f1_score

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import xgboost as xgb
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier

import warnings
warnings.filterwarnings('ignore')

## `Data import`

In [14]:
filename = '/content/drive/MyDrive/data-sets/diabetes/diabetes.csv'
df = pd.read_csv(filename)
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


# 2 - Preprocessing:

In [15]:
df.shape

(768, 9)

In [16]:
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [17]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [18]:
X

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [19]:
y

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=32, stratify = y)

In [21]:
X_train

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
69,4,146,85,27,100,28.9,0.189,27
265,5,96,74,18,67,33.6,0.997,43
439,6,107,88,0,0,36.8,0.727,31
411,1,112,72,30,176,34.4,0.528,25
37,9,102,76,37,0,32.9,0.665,46
...,...,...,...,...,...,...,...,...
484,0,145,0,0,0,44.2,0.630,31
495,6,166,74,0,0,26.6,0.304,66
159,17,163,72,41,114,40.9,0.817,47
300,0,167,0,0,0,32.3,0.839,30


In [22]:
y_train

69     0
265    0
439    0
411    0
37     1
      ..
484    1
495    0
159    1
300    1
451    1
Name: Outcome, Length: 537, dtype: int64

# 3 - Algorithms:

## `Random Forest`

In [23]:
parameters = {
  'max_depth':[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
  'criterion':['gini','entropy'],
  'max_features':['auto', 'sqrt', 'log2'],
  'n_jobs':[-1],
  'random_state': [42],
  'class_weight': ['balanced', 'balanced_subsample'],
  'min_samples_split':[0.125,0.25,0.5,1,2,3,4,6,10]
}

grid_random = GridSearchCV(estimator = RandomForestClassifier(), param_grid = parameters, scoring = 'f1', cv = 5)
grid_random.fit(X_train,y_train)
pd.DataFrame(grid_random.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_class_weight,param_criterion,param_max_depth,param_max_features,param_min_samples_split,param_n_jobs,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.408763,0.351844,0.104113,0.000410,balanced,gini,10,auto,0.125,-1,42,"{'class_weight': 'balanced', 'criterion': 'gin...",0.680851,0.769231,0.617284,0.541176,0.634146,0.648538,0.075261,220
1,0.223552,0.007584,0.103720,0.000073,balanced,gini,10,auto,0.25,-1,42,"{'class_weight': 'balanced', 'criterion': 'gin...",0.666667,0.716049,0.607595,0.581395,0.651163,0.644574,0.046902,297
2,0.217967,0.010426,0.104320,0.000934,balanced,gini,10,auto,0.5,-1,42,"{'class_weight': 'balanced', 'criterion': 'gin...",0.711111,0.700000,0.613333,0.611765,0.666667,0.660575,0.041856,67
3,0.686703,0.301620,0.000000,0.000000,balanced,gini,10,auto,1,-1,42,"{'class_weight': 'balanced', 'criterion': 'gin...",,,,,,,,1090
4,0.405704,0.340618,0.103907,0.000392,balanced,gini,10,auto,2,-1,42,"{'class_weight': 'balanced', 'criterion': 'gin...",0.632911,0.757576,0.507463,0.529412,0.477612,0.580995,0.102598,850
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1183,0.518723,0.421881,0.103936,0.000426,balanced_subsample,entropy,,log2,2,-1,42,"{'class_weight': 'balanced_subsample', 'criter...",0.697674,0.746269,0.567164,0.451613,0.466667,0.585877,0.119006,798
1184,0.309988,0.006386,0.104015,0.000702,balanced_subsample,entropy,,log2,3,-1,42,"{'class_weight': 'balanced_subsample', 'criter...",0.643678,0.688525,0.500000,0.539683,0.476190,0.569615,0.082577,919
1185,0.305913,0.010042,0.103624,0.000074,balanced_subsample,entropy,,log2,4,-1,42,"{'class_weight': 'balanced_subsample', 'criter...",0.681319,0.738462,0.571429,0.507463,0.537313,0.607197,0.088130,674
1186,0.304508,0.007228,0.103719,0.000169,balanced_subsample,entropy,,log2,6,-1,42,"{'class_weight': 'balanced_subsample', 'criter...",0.666667,0.727273,0.600000,0.611111,0.492308,0.619472,0.078062,555


In [24]:
grid_random.best_params_

{'class_weight': 'balanced_subsample',
 'criterion': 'entropy',
 'max_depth': 20,
 'max_features': 'auto',
 'min_samples_split': 0.125,
 'n_jobs': -1,
 'random_state': 42}

In [25]:
grid_random.best_score_

0.6782310542198688

### _Aplying the best params in model_

In [26]:
model_random_forest = RandomForestClassifier(
 class_weight = 'balanced_subsample',
 criterion = 'entropy',
 max_depth = 20,
 max_features = 'auto',
 min_samples_split = 0.125,
 n_jobs = -1,
 random_state = 42
)
model_random_forest.fit(X_train, y_train)
random_pred_train = model_random_forest.predict(X_train)
random_pred_test = model_random_forest.predict(X_test)
print(f'The f1_score of train is {np.round(f1_score(y_train,random_pred_train)*100,2)}% and the f1_score of test is {np.round(f1_score(y_test,random_pred_test)*100,2)}%')

The f1_score of train is 76.37% and the f1_score of test is 70.0%


In [27]:
results = {'algorithm':[], 'train score':[], 'test score':[]}
results['algorithm'].append('Random Forest')
results['train score'].append(np.round(f1_score(y_train,random_pred_train)*100,2))
results['test score'].append(np.round(f1_score(y_test,random_pred_test)*100,2))

#### _The cross validation mades the result works worse_.

## `Gradient Bosting:`

In [28]:
parameters_grad = {
  'loss':['deviance'],
  'random_state': [42],
  'max_features':['auto', 'sqrt', 'log2'],
  'learning_rate':[0.001,0.01,0.1,1],
  'max_depth':[10, 20, 30, 40, 80, 90, 100, None],
  'max_features':['auto', 'sqrt', 'log2'],
  'random_state': [42],
  'min_samples_split':[0.125,0.25,0.5,1,2,3,4,6,10],
  'loss' :['deviance', 'exponential']
}

grid_gradB = GridSearchCV(estimator = GradientBoostingClassifier(), param_grid = parameters_grad, scoring = 'f1', cv = 5)
grid_gradB.fit(X_train,y_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                  criterion='friedman_mse',
                                                  init=None, learning_rate=0.1,
                                                  loss='deviance', max_depth=3,
                                                  max_features=None,
                                                  max_leaf_nodes=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=0.0,
                                                  n_estimators=100,
                                                  n_iter_no_c...
                 

In [29]:
pd.DataFrame(grid_gradB.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_loss,param_max_depth,param_max_features,param_min_samples_split,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.205204,0.006882,0.002545,0.000135,0.001,deviance,10,auto,0.125,42,"{'learning_rate': 0.001, 'loss': 'deviance', '...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1153
1,0.140603,0.009716,0.002391,0.000042,0.001,deviance,10,auto,0.25,42,"{'learning_rate': 0.001, 'loss': 'deviance', '...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1153
2,0.085283,0.004835,0.002613,0.000663,0.001,deviance,10,auto,0.5,42,"{'learning_rate': 0.001, 'loss': 'deviance', '...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1153
3,0.003849,0.000661,0.000000,0.000000,0.001,deviance,10,auto,1,42,"{'learning_rate': 0.001, 'loss': 'deviance', '...",,,,,,,,1601
4,0.391552,0.034271,0.002710,0.000103,0.001,deviance,10,auto,2,42,"{'learning_rate': 0.001, 'loss': 'deviance', '...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1723,0.065572,0.001323,0.002173,0.000040,1,exponential,,log2,2,42,"{'learning_rate': 1, 'loss': 'exponential', 'm...",0.635294,0.566667,0.567568,0.520548,0.584615,0.574938,0.036926,331
1724,0.076487,0.001108,0.002196,0.000014,1,exponential,,log2,3,42,"{'learning_rate': 1, 'loss': 'exponential', 'm...",0.615385,0.586207,0.575342,0.486486,0.602740,0.573232,0.045487,354
1725,0.082135,0.002242,0.002174,0.000013,1,exponential,,log2,4,42,"{'learning_rate': 1, 'loss': 'exponential', 'm...",0.597701,0.638889,0.602740,0.563380,0.537313,0.588005,0.034861,192
1726,0.083881,0.001596,0.002212,0.000033,1,exponential,,log2,6,42,"{'learning_rate': 1, 'loss': 'exponential', 'm...",0.543210,0.666667,0.515152,0.588235,0.563380,0.575329,0.051585,313


In [30]:
grid_gradB.best_params_

{'learning_rate': 1,
 'loss': 'deviance',
 'max_depth': 10,
 'max_features': 'sqrt',
 'min_samples_split': 10,
 'random_state': 42}

In [31]:
grid_gradB.best_score_

0.6286738678386813

### _Aplying the best params in model_

In [32]:
model_gradient = GradientBoostingClassifier(
  learning_rate = 1,
  loss = 'deviance',
  max_depth = 10,
  max_features = 'sqrt',
  min_samples_split = 10,
  random_state = 42
)

model_gradient.fit(X_train, y_train)
model_gradient_train = model_gradient.predict(X_train)
model_gradient_test = model_gradient.predict(X_test)
print(f'The f1_score of train is {np.round(f1_score(y_train,model_gradient_train)*100)}% and the f1_score of test is {np.round(f1_score(y_test,model_gradient_test)*100,2)}%')

The f1_score of train is 100.0% and the f1_score of test is 62.94%


In [33]:
results['algorithm'].append('Gradient Bosting')
results['train score'].append(np.round(f1_score(y_train,model_gradient_train)*100))
results['test score'].append(np.round(f1_score(y_test,model_gradient_test)*100,2))

# Now We'll apply Gradient Bosting versions that was optimazed like: XGBoost ,LightGBM and CatBoost

## `XGBoosting`

In [34]:
parameters_grad = {
  'booster':['gbtree','gblinear','dart'],
  'eta':[0.001,0.01,0.1,1],
  'max_depth':[10, 20, 30, 40, 80, 90, 100, None],
  'max_features':['auto', 'sqrt', 'log2'],
  'random_state': [42],
  'min_samples_split':[0.125,0.25,0.5,1,2,3,4,6,10]
}

grid_xgb = GridSearchCV(estimator =xgb.XGBClassifier(), 
                          param_grid = parameters_grad, scoring = 'f1', cv = 5)
grid_xgb.fit(X_train,y_train)
pd.DataFrame(grid_xgb.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_booster,param_eta,param_max_depth,param_max_features,param_min_samples_split,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.090335,0.008670,0.003063,0.000023,gbtree,0.001,10,auto,0.125,42,"{'booster': 'gbtree', 'eta': 0.001, 'max_depth...",0.606742,0.65625,0.575342,0.523077,0.5,0.572282,0.05638,1297
1,0.083783,0.001129,0.003041,0.000043,gbtree,0.001,10,auto,0.25,42,"{'booster': 'gbtree', 'eta': 0.001, 'max_depth...",0.606742,0.65625,0.575342,0.523077,0.5,0.572282,0.05638,1297
2,0.084087,0.001472,0.003022,0.000027,gbtree,0.001,10,auto,0.5,42,"{'booster': 'gbtree', 'eta': 0.001, 'max_depth...",0.606742,0.65625,0.575342,0.523077,0.5,0.572282,0.05638,1297
3,0.086125,0.003018,0.003065,0.000087,gbtree,0.001,10,auto,1,42,"{'booster': 'gbtree', 'eta': 0.001, 'max_depth...",0.606742,0.65625,0.575342,0.523077,0.5,0.572282,0.05638,1297
4,0.083805,0.001047,0.003113,0.000140,gbtree,0.001,10,auto,2,42,"{'booster': 'gbtree', 'eta': 0.001, 'max_depth...",0.606742,0.65625,0.575342,0.523077,0.5,0.572282,0.05638,1297
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2587,0.002980,0.000305,0.000000,0.000000,dart,1,,log2,2,42,"{'booster': 'dart', 'eta': 1, 'max_depth': Non...",,,,,,,,2452
2588,0.002879,0.000034,0.000000,0.000000,dart,1,,log2,3,42,"{'booster': 'dart', 'eta': 1, 'max_depth': Non...",,,,,,,,2453
2589,0.002996,0.000183,0.000000,0.000000,dart,1,,log2,4,42,"{'booster': 'dart', 'eta': 1, 'max_depth': Non...",,,,,,,,2454
2590,0.002984,0.000095,0.000000,0.000000,dart,1,,log2,6,42,"{'booster': 'dart', 'eta': 1, 'max_depth': Non...",,,,,,,,2499


In [None]:
grid_xgb.best_params_

In [None]:
grid_xgb.best_score_

### _Aplying the best params in model_

In [None]:
model_xgb = xgb.XGBClassifier(
 booster = 'gbtree',
 eta = 0.001,
 max_depth = 20,
 max_features = 'auto',
 min_samples_split = 0.125,
 random_state = 42
)

model_xgb.fit(X_train,y_train)
model_xgb_train = model_xgb.predict(X_train)
model_xgb_test = model_xgb.predict(X_test)
print(f'The f1_score of train is {np.round(f1_score(y_train,model_xgb_train)*100)}% and the f1_score of test is {np.round(f1_score(y_test,model_xgb_test)*100,2)}%')

In [None]:
results['algorithm'].append('XGBoost')
results['train score'].append(np.round(f1_score(y_train,model_xgb_train)*100))
results['test score'].append(np.round(f1_score(y_test,model_xgb_test)*100,2))

## `LightGBM` 

In [None]:
parameters_light = {
  'booster':['gbtree','gblinear','dart'],
  'eta':[0.001,0.01,0.1,1],
  'max_depth':[10, 20, 30, 40, 80, 90, 100, None],
  'max_features':['auto', 'sqrt', 'log2'],
  'random_state': [42],
  'min_samples_split':[0.125,0.25,0.5,1,2,3,4,6,10]
}

grid_light = GridSearchCV(estimator = LGBMClassifier(), 
                          param_grid = parameters_light, scoring = 'f1')
grid_light.fit(X_train,y_train)
pd.DataFrame(grid_light.cv_results_)

In [None]:
grid_light.best_params_

In [41]:
grid_light.best_score_

0.5557423139520326

### _Aplying the best params in model_

In [None]:
model_light = LGBMClassifier(
  booster = 'gbtree',
  eta = 0.001,
  max_depth = 20,
  max_features = 'auto',
  min_samples_split = 0.125,
  random_state = 42
)

model_light.fit(X_train,y_train)
model_light_train = model_light.predict(X_train)
model_light_test = model_light.predict(X_test)
print(f'The f1_score of train is {np.round(f1_score(y_train,model_light_train)*100)}% and the f1_score of test is {np.round(f1_score(y_test,model_light_test)*100,2)}%')

In [None]:
results['algorithm'].append('LGBM')
results['train score'].append(np.round(f1_score(y_train,model_light_train)*100))
results['test score'].append(np.round(f1_score(y_test,model_light_test)*100,2))

## `Catboost`

In [None]:
parameters_cat = {
  'learning_rate':[0.001,0.01,0.1,1],
  'max_depth':[10, 20, 30, 40, 80, 90, 100],
  'random_seed': [42]
}

grid_cat = GridSearchCV(estimator = CatBoostClassifier(), 
                          param_grid = parameters_cat, scoring = 'f1')
grid_cat.fit(X_train,y_train)
pd.DataFrame(grid_cat.cv_results_)

In [None]:
grid_cat.best_params_

In [None]:
grid_cat.best_score_

### _Aplying the best params in model_

In [47]:
model_cat = CatBoostClassifier(
  learning_rate = 0.01,
  max_depth = 10,
  random_seed = 42
)

model_cat.fit(X_train,y_train)
model_cat_train = model_cat.predict(X_train)
model_cat_test = model_cat.predict(X_test)

0:	learn: 0.6856822	total: 15.5ms	remaining: 15.5s
1:	learn: 0.6800627	total: 32.2ms	remaining: 16.1s
2:	learn: 0.6741949	total: 45.6ms	remaining: 15.2s
3:	learn: 0.6678767	total: 57.3ms	remaining: 14.3s
4:	learn: 0.6616683	total: 69.6ms	remaining: 13.9s
5:	learn: 0.6563815	total: 81.4ms	remaining: 13.5s
6:	learn: 0.6505787	total: 93.2ms	remaining: 13.2s
7:	learn: 0.6450495	total: 105ms	remaining: 13s
8:	learn: 0.6395385	total: 117ms	remaining: 12.8s
9:	learn: 0.6344674	total: 128ms	remaining: 12.7s
10:	learn: 0.6298752	total: 140ms	remaining: 12.6s
11:	learn: 0.6244258	total: 152ms	remaining: 12.5s
12:	learn: 0.6191443	total: 163ms	remaining: 12.4s
13:	learn: 0.6140617	total: 175ms	remaining: 12.4s
14:	learn: 0.6101496	total: 178ms	remaining: 11.7s
15:	learn: 0.6053287	total: 190ms	remaining: 11.7s
16:	learn: 0.6006162	total: 203ms	remaining: 11.7s
17:	learn: 0.5962683	total: 220ms	remaining: 12s
18:	learn: 0.5914335	total: 232ms	remaining: 12s
19:	learn: 0.5866003	total: 244ms	remain

In [None]:
print(f'The f1_score of train is {np.round(f1_score(y_train,model_cat_train)*100)}% and the f1_score of test is {np.round(f1_score(y_test,model_cat_test)*100,2)}%')

In [None]:
results['algorithm'].append('CatBoost')
results['train score'].append(np.round(f1_score(y_train,model_cat_train)*100))
results['test score'].append(np.round(f1_score(y_test,model_cat_test)*100,2))

# Now We'll apply two types of artificial neural networks, a basic type: Perceptron and a more advanced type: Multi-layer Perceptron.

## `Perceptron`

In [None]:
model_ann_perceptron = Perceptron(max_iter = 400, random_state = 42, n_jobs = -1, early_stopping = True, penalty = 'l2', eta0 = 000.1).fit(X_train, y_train)
model_ann_perceptron

In [None]:
model_ann_perceptron_train = model_ann_perceptron.predict(X_train)
model_ann_perceptron_test = model_ann_perceptron.predict(X_test)
print(f'The f1_score of train is {np.round(f1_score(y_train,model_ann_perceptron_train)*100)}% and the f1_score of test is {np.round(f1_score(y_test,model_ann_perceptron_test)*100,2)}%')

In [None]:
results['algorithm'].append('Perceptron')
results['train score'].append(np.round(f1_score(y_train,model_ann_perceptron_train)*100))
results['test score'].append(np.round(f1_score(y_test,model_ann_perceptron_test)*100,2))

## `Multi-layer Perceptron`

In [None]:
model_ann_MULperceptron = MLPClassifier(hidden_layer_sizes = 100 ,n_iter_no_change = 100, early_stopping = True, random_state = 42, activation='relu').fit(X_train, y_train)
model_ann_MULperceptron

In [54]:
model_ann_MULperceptron.loss_

0.4616850701938193

In [None]:
model_ann_MULperceptron_train = model_ann_MULperceptron.predict(X_train)
model_ann_MULperceptron_test = model_ann_MULperceptron.predict(X_test)
print(f'The f1_score of train is {np.round(f1_score(y_train,model_ann_MULperceptron_train)*100)}% and the f1_score of test is {np.round(f1_score(y_test,model_ann_MULperceptron_test)*100,2)}%')

In [56]:
results['algorithm'].append('Multi-Layer Perceptron')
results['train score'].append(np.round(f1_score(y_train,model_ann_MULperceptron_train)*100))
results['test score'].append(np.round(f1_score(y_test,model_ann_MULperceptron_test)*100,2))

# Table of results:

In [57]:
pd.DataFrame.from_dict(results)

Unnamed: 0,algorithm,train score,test score
0,Random Forest,76.37,70.0
1,Gradient Bosting,100.0,62.94
2,XGBoost,100.0,67.63
3,LGBM,100.0,65.28
4,CatBoost,100.0,64.29
5,Perceptron,55.0,57.47
6,Multi-Layer Perceptron,68.0,55.71


Unnamed: 0,algorithm,train score,test score
0,Random Forest,76.37,70.0
1,Gradient Bosting,100.0,62.94
2,XGBoost,100.0,67.63
3,LGBM,100.0,65.28
4,CatBoost,100.0,64.29
5,Perceptron,55.0,57.47
6,Multi-Layer Perceptron,68.0,55.71


## A unique problem was that the results are duplicated, just it...