## Atuação de redes neurais na base de dados do ENEM.

##### Alunos:

-   Gabriel Fonseca (2111066)
-   Yasmim Santos (2116925)
-   Alejandro Elias (2111189)
-   Pedro Lucas (2111131)

Base de dados escolhida - Exame Nacional do Ensino Médio (Enem): https://basedosdados.org/dataset/3e9c8804-c31c-4f48-9a45-d67f1c21a859


In [25]:
import pandas as pd
import numpy as np

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

In [26]:
df_enem = pd.read_csv("../data/out/enem-dados-tratados-ml.csv")

tts_r_state = 73
mlp_r_state = 83

df_enem

Unnamed: 0,pc1,pc2,pc3,pc4,pc5,pc6,pc7,pc8,pc9,pc10,...,pc12,pc13,pc14,pc15,pc16,pc17,pc18,pc19,pc20,label
0,0.080772,-0.762073,0.785344,-0.199762,-0.054471,0.049956,0.085057,-0.540187,0.122058,-0.009350,...,0.117866,0.051369,0.182662,0.045681,-0.155865,-0.028637,0.172294,0.042257,-0.090118,0
1,-0.393375,-0.548348,0.338719,0.221931,0.016530,-0.173464,-0.307265,-0.246615,-0.040624,-0.236044,...,0.149144,-0.062155,-0.132039,-0.132272,-0.132336,-0.074050,0.376691,0.060337,-0.172838,3
2,0.776151,0.015068,-0.138434,-0.401179,-0.118827,0.141522,0.238757,-0.504723,0.119789,-0.021885,...,0.090707,0.061678,-0.005448,0.076559,-0.247858,-0.015698,0.181723,0.028222,-0.091366,2
3,-0.298046,-0.212125,-0.284790,0.008430,-0.069643,-0.025388,0.005118,0.034027,-0.003384,0.037276,...,-0.047922,-0.000190,-0.154626,-0.041946,0.002010,-0.008484,0.395561,0.166758,-0.128445,3
4,-0.112868,-0.191350,-0.089860,0.090640,-0.013207,-0.043427,0.032068,0.104094,0.030604,0.156161,...,-0.053985,0.017279,0.184811,-0.046636,0.159181,-0.006575,0.304874,0.173119,-0.099544,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176574,0.690036,0.275459,-0.179738,0.192363,0.070071,-0.119768,-0.097106,-0.064469,0.026958,-0.002589,...,0.109595,-0.016944,0.384752,-0.111325,0.104217,-0.056225,0.199506,0.062727,-0.117149,2
176575,-0.864842,0.316885,-0.248852,-0.297565,-0.790840,-0.036313,-0.041153,-0.010110,0.254096,-0.300749,...,-0.255992,-0.679908,-0.248120,-0.065714,0.100700,0.162870,-0.217107,0.816525,-0.124461,1
176576,-0.690970,-0.464786,-0.921494,-0.172441,-0.288681,-0.029161,-0.171939,-0.371105,-0.857431,0.223315,...,0.255786,0.331872,0.018643,-0.193724,0.179299,-0.010597,-0.154445,-0.128870,0.211709,4
176577,-0.095767,-0.509821,0.567618,0.561371,0.035605,-0.666054,-0.746402,-0.182829,0.341722,0.691655,...,-0.045449,0.064493,-0.080547,0.173111,-0.230392,0.019107,0.097194,0.112198,0.354271,3


In [27]:
X_mlp = df_enem.filter(regex="pc").to_numpy()
y_mlp = df_enem["label"].to_numpy()

n_components = X_mlp.shape[1]

X_train_mlp, X_test_mlp, y_train_mlp, y_test_mlp = train_test_split(
    X_mlp, y_mlp, test_size=0.5, random_state=tts_r_state
)

mlp = MLPClassifier(max_iter=100, random_state=mlp_r_state)

param_grid = {
    "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 100)],
    "activation": ["tanh", "relu"],
    "solver": ["sgd", "adam"],
    "alpha": [1e-5, 1e-4, 1e-3],
    "learning_rate": ["constant", "adaptive"],
}

grid_search = GridSearchCV(mlp, param_grid, n_jobs=-1, cv=5, scoring="accuracy")
grid_search.fit(X_train_mlp, y_train_mlp)

resultado_gs = pd.DataFrame(grid_search.cv_results_)
resultado_gs = resultado_gs.sort_values(by="rank_test_score")
resultado_gs.to_csv(
    f"../data/out/resultado_gridsearch_mlp_{n_components}.csv", index=False
)

resultado_gs = resultado_gs[
    ["params", "mean_test_score", "std_test_score", "rank_test_score"]
]

resultado_gs

Unnamed: 0,params,mean_test_score,std_test_score,rank_test_score
5,"{'activation': 'tanh', 'alpha': 1e-05, 'hidden...",0.999604,0.000143,1
7,"{'activation': 'tanh', 'alpha': 1e-05, 'hidden...",0.999604,0.000143,1
23,"{'activation': 'tanh', 'alpha': 0.0001, 'hidde...",0.999558,0.000110,3
21,"{'activation': 'tanh', 'alpha': 0.0001, 'hidde...",0.999558,0.000110,3
39,"{'activation': 'tanh', 'alpha': 0.001, 'hidden...",0.999524,0.000111,5
...,...,...,...,...
6,"{'activation': 'tanh', 'alpha': 1e-05, 'hidden...",0.998290,0.000376,91
20,"{'activation': 'tanh', 'alpha': 0.0001, 'hidde...",0.998290,0.000376,91
4,"{'activation': 'tanh', 'alpha': 1e-05, 'hidden...",0.998290,0.000376,91
22,"{'activation': 'tanh', 'alpha': 0.0001, 'hidde...",0.998290,0.000376,91
