### Importação

In [None]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import ColumnTransformer, make_column_selector
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from sklearn import tree
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
import joblib
from sklearn.neighbors import NearestNeighbors


### Leitura do DF

In [None]:
df = pd.read_excel("Delfis! - Questionário.xlsx", usecols='F:L')
display(df)

### Separação de resposta e atributo

In [None]:

resposta = pd.DataFrame(df['Você gostaria de utilizar o nosso aplicativo?'])
display(resposta)

atributo = df.drop(columns='Você gostaria de utilizar o nosso aplicativo?', axis=1)
display(atributo)

### Normalização de dados (para a serialização)

In [None]:
preprocessador = ColumnTransformer(transformers=[
    ('categoricas', OrdinalEncoder(), make_column_selector(dtype_include=object))
])

atributos = preprocessador.fit_transform(atributo)

# Get the names of the categorical columns
categorical_columns = atributo.select_dtypes(include=object).columns

# Create a DataFrame with the transformed data
atributo = pd.DataFrame(atributos, columns=categorical_columns)

# Display the DataFrame
display(atributo)


### Separando treino e teste

In [None]:
atributos = atributo.values
resposta = resposta.values

x_train, x_test, y_train, y_test = train_test_split(atributos, resposta, test_size=0.25, random_state=42)

### PCA

In [None]:
# from sklearn.decomposition import PCA

# pca = PCA(n_components=2)
# pca.fit(x_train)
# x_train_pca = pca.transform(x_train)
# x_test_pca = pca.transform(x_test)

### Oversampling

In [None]:
# smote = SMOTE()

# x_train_over, y_train_over = smote.fit_resample(x_train, y_train)

### Undersampling

In [None]:
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=42, sampling_strategy = 'majority')
x_train_over, y_train_over = rus.fit_resample(x_train, y_train)
display(x_train)
display(y_train)


## Modelo de Gaus

### Modelo de Treinando a IA (Gaus)

In [None]:
classificador_bayes = GaussianNB()

classificador_bayes.fit(x_train_over, y_train_over)
classificacao = classificador_bayes.predict(x_test)

### Metricas do modelo de Gaus

In [None]:
relatorio_gaus = classification_report(y_test, classificacao)
print(relatorio_gaus)


## Árvore de decisão

### Treinando os modelos

In [None]:

# Classificação levando em consideração o gini
classificador_gini = tree.DecisionTreeClassifier(criterion='gini')

# Classificação levando em consideração a entropy
classificador_entropy = tree.DecisionTreeClassifier(criterion='entropy')

### Classificação

In [None]:
classificador_gini.fit(x_train_over, y_train_over)

classificador_entropy.fit(x_train, y_train)

### Plot das árvores

In [None]:
# import matplotlib.pyplot as plt

# plt.figure(figsize=(30, 7))
# tree.plot_tree(classificador_gini,
#             feature_names=atributo.columns,
#             class_names= classificador_gini.classes_,
#             filled= True);

# plt.figure(figsize=(30, 7))
# tree.plot_tree(classificador_entropy,
#             feature_names=atributo.columns,
#             class_names= classificador_entropy.classes_,
#             filled= True);

### Predição

In [None]:
classificacao_gini = classificador_gini.predict(x_test)
# print('Resposta: ', y_test)
# print('Classificação feita para IA: ', classificacao_gini)

classificacao_entropy = classificador_entropy.predict(x_test)
# print('Resposta: ', y_test)
# print('Classificação feita para IA: ', classificacao_entropy)

matriz_de_confusao = confusion_matrix(y_test, classificacao_gini)
print('Matriz de confusão gini: \n\n', matriz_de_confusao)

matriz_de_confusao = confusion_matrix(y_test, classificacao_entropy)
print('\nMatriz de confusão entropia: \n\n', matriz_de_confusao)

### Report

In [None]:
report_gini = classification_report(y_test, classificacao_gini)
print("Reporte Gini: \n", report_gini)

report_entropy = classification_report(y_test, classificacao_entropy)
print("Reporte Entropy: \n", report_entropy)

### Usando o GridSearchCV

In [None]:
parametros = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 2, 4, 6, 8, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5, 10],
    'max_features': [None, 'sqrt', 'log2']
}
classificador_tree = tree.DecisionTreeClassifier()
grid = GridSearchCV(estimator=classificador_tree, param_grid=parametros, cv=5, verbose=3)
grid.fit(x_train, y_train)

print("Melhores hyperparameters: ", grid.best_params_)
print("Melhor acuracia: ", grid.best_score_)

In [None]:
classificador_tree = tree.DecisionTreeClassifier(
    criterion='gini',
    max_depth=10,
    min_samples_leaf=10,
    min_samples_split=10,
    splitter='best'
)

classificador_tree.fit(x_train_over, y_train_over)
classificador_tree_predict = classificador_tree.predict(x_test)

report_tree_best = classification_report(y_test, classificador_tree_predict)
print("Report Best: \n", report_tree_best)

## KNN

### KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier


knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train_over, y_train_over)

In [None]:
knn_predict = knn.predict(x_test)

report_knn = classification_report(y_test, knn_predict)
print("Report KNN: \n", report_knn)

In [None]:
import numpy as np
from sklearn.discriminant_analysis import StandardScaler
from sklearn.model_selection import cross_val_score


k_values = [i for i in range (1,31)]
scores = []

scaler = StandardScaler()
X = scaler.fit_transform(atributo)

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    score = cross_val_score(knn, X, resposta, cv=5)
    scores.append(np.mean(score))

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns

sns.lineplot(x = k_values, y = scores, marker = 'o')
plt.xlabel("K Values")
plt.ylabel("Accuracy Score")

### 

## Serialização

In [None]:
# joblib.dump(melhor_classificador, "delfis_modelo.pl1")

In [None]:
# model_carregado = joblib.load("delfis_modelo.pl1")