<a href="https://colab.research.google.com/github/joaohnt/NenClassifier/blob/main/avaliacao_de_algoritmos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.model_selection import GridSearchCV # cross validation
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import pickle
import numpy as np

# preparacao dos dados

In [None]:
with open('credit.pkl', 'rb') as f:
  X_credit_treinamento, y_credit_treinamento, X_credit_teste, y_credit_teste = pickle.load(f)

In [None]:
X_credit = np.concatenate((X_credit_treinamento, X_credit_teste), axis = 0)
X_credit.shape

(2000, 3)

In [None]:
X_credit

array([[-1.3754462 ,  0.50631087,  0.10980934],
       [ 1.45826409, -1.6489393 , -1.21501497],
       [-0.79356829,  0.22531191, -0.43370226],
       ...,
       [ 1.37445674, -1.05746281, -1.12564819],
       [-1.57087737, -0.63488173, -0.36981671],
       [-1.03572293, -0.93978122,  0.04244312]])

In [None]:
y_credit = np.concatenate((y_credit_treinamento, y_credit_teste), axis = 0)
y_credit.shape

(2000,)

In [None]:
y_credit

array([0, 0, 0, ..., 0, 1, 1])

# arvore de decisao

In [37]:
DecisionTreeClassifier()

In [35]:
parametros = {'criterion': ['gini', 'entropy'],
              'splitter': ['best', 'random'],
              'min_samples_split': [2,5,10],
              'min_samples_leaf': [1,5,10]}

In [36]:
grid_search = GridSearchCV(estimator = DecisionTreeClassifier(), param_grid = parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
0.983


# random forest

In [38]:
parametros = {'criterion': ['gini', 'entropy'],
              'n_estimators': [10, 40, 100, 150],
              'min_samples_split': [2,5,10],
              'min_samples_leaf': [1,5,10]}

In [40]:
grid_search = GridSearchCV(estimator = RandomForestClassifier(), param_grid = parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 40}
0.9880000000000001


# knn

In [42]:
parametros = {'n_neighbors': [3,5,10,20],
              'p': [1,2]}

In [43]:
grid_search = GridSearchCV(estimator = KNeighborsClassifier(), param_grid = parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'n_neighbors': 20, 'p': 1}
0.9800000000000001


# regressao logistica

In [44]:
parametros = {'tol': [0.0001, 0.00001, 0.000001],
              'C': [1.0, 1.5, 2.0],
               'solver': ['lbfgs', 'sag', 'saga']}

In [45]:
grid_search = GridSearchCV(estimator = LogisticRegression(), param_grid = parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 1.0, 'solver': 'lbfgs', 'tol': 0.0001}
0.9484999999999999


# svm

In [46]:
parametros = {'tol': [0.0001, 0.00001, 0.000001],
              'C': [1.0, 1.5, 2.0],
               'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}

In [47]:
grid_search = GridSearchCV(estimator = SVC(), param_grid = parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_
print(melhores_parametros)
print(melhor_resultado)

{'C': 1.5, 'kernel': 'rbf', 'tol': 0.0001}
0.9829999999999999


# redes neurais

In [49]:
parametros = {'activation': ['relu', 'logistic', 'tahn'],
              'solver': ['adam', 'sgd'],
              'batch_size': [10,56]}

In [None]:
grid_search = GridSearchCV(estimator = MLPClassifier(), param_grid = parametros)
grid_search.fit(X_credit, y_credit)
melhores_parametros = grid_search.best_params_
melhor_resultado = grid_search.best_score_

In [51]:
print(melhores_parametros)
print(melhor_resultado)

{'activation': 'relu', 'batch_size': 10, 'solver': 'adam'}
0.9970000000000001


# validacao cruzada

In [53]:
from sklearn.model_selection import cross_val_score, KFold

In [57]:
resultados_arvore = []
resultados_random_forest = []
resultados_knn = []
resultados_logistica = []
resultados_svm = []
resultados_rede_neural = []


for i in range(30):
  kfold = KFold(n_splits = 10, shuffle = True, random_state = i)
  arvore = DecisionTreeClassifier(criterion = 'entropy', min_samples_leaf = 1, min_samples_split = 5, splitter = 'best')
  scores = cross_val_score(arvore, X_credit, y_credit, cv = kfold) # a funcao recebe os dados divididos e faz o teste
  # print(scores)
  # print(scores.mean())
  resultados_arvore.append(scores.mean())

  random_forest = RandomForestClassifier(criterion = 'entropy', min_samples_leaf = 1, min_samples_split = 5, n_estimators = 10)
  scores = cross_val_score(random_forest, X_credit, y_credit, cv = kfold)
  resultados_random_forest.append(scores.mean())

  # ai é so seguir o padrao pra todos os outros, preguica de fazer tudo

In [56]:
resultados_arvore

[0.9864999999999998,
 0.986,
 0.9905000000000002,
 0.9875,
 0.9879999999999999,
 0.9890000000000001,
 0.9880000000000001,
 0.9875,
 0.9855,
 0.9869999999999999,
 0.9860000000000001,
 0.9894999999999999,
 0.9884999999999999,
 0.9864999999999998,
 0.9825000000000002,
 0.9859999999999998,
 0.9855,
 0.9904999999999999,
 0.9875,
 0.9869999999999999,
 0.983,
 0.9865,
 0.9884999999999999,
 0.9869999999999999,
 0.9875,
 0.9880000000000001,
 0.9875,
 0.9855,
 0.9854999999999998,
 0.9880000000000001]

In [58]:
resultados_random_forest

[0.9809999999999999,
 0.9834999999999999,
 0.9844999999999999,
 0.9825000000000002,
 0.9865,
 0.9879999999999999,
 0.9834999999999999,
 0.9850000000000001,
 0.983,
 0.984,
 0.9815000000000002,
 0.9849999999999998,
 0.985,
 0.9819999999999999,
 0.9804999999999998,
 0.9789999999999999,
 0.9789999999999999,
 0.983,
 0.9824999999999999,
 0.983,
 0.9835,
 0.9845,
 0.985,
 0.9894999999999999,
 0.982,
 0.982,
 0.983,
 0.983,
 0.9814999999999999,
 0.983]