# Applications of Machine Learning Algorithms (Working with three models)

### Load libraries

In [1]:
%run ../services/imports.py
%run ../services/function.py
%run ../services/classifier_evaluation.py
%run ../services/visualization.py

importing Jupyter notebook from /home/jean/project/project_dataScience_POSCOMP/notebooks/../services/especialidades.ipynb


In [2]:
visualizacao = Visualizacao()

## Load the database

In [3]:
# dataset = pd.read_csv("../dados/df_poscomp.csv")

In [30]:
dataset = pd.read_csv("../dados/poscomp_frame.csv")
dataset.columns = dataset.columns.str.lower()

In [31]:
dataset['linhas_pesquisas'].value_counts()

linhas_pesquisas
inteligência artificial    2490
sistemas de computação     1819
engenharia de software     1667
redes de computadores       921
Name: count, dtype: int64

In [32]:
dataset["linhas_pesquisas"].value_counts()

linhas_pesquisas
inteligência artificial    2490
sistemas de computação     1819
engenharia de software     1667
redes de computadores       921
Name: count, dtype: int64

In [33]:
dataset = dataset[(dataset["linhas_pesquisas"] != "Outros")]

In [34]:
X_dataset = dataset.loc[
    :,
    [
        "idade",
        "sexo",
        "regiao",
        "estado",
        "matematica",
        "fund_computacao",
        "tec_computacao",
        "total",
        "area_concentration",
    ],
]

In [35]:
X_dataset["area_concentration"] = dataset.area_concentration
X_dataset["area_concentration"] = X_dataset["area_concentration"].astype("category")

In [36]:
def encode_categorical_columns(df, columns):
    le = LabelEncoder()
    for col in columns:
        df[col] = le.fit_transform(df[col])
    return df


categorical_columns = ["area_concentration", "sexo", "regiao", "estado"]
X_dataset = encode_categorical_columns(X_dataset, categorical_columns)

In [37]:
X = X_dataset.drop(["area_concentration"], axis=1)
y = X_dataset["area_concentration"]

## Working with the first model

### Split the base

In [38]:
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [39]:
print("Shape of x_train : ", x_train.shape)
print("Shape of x_test  : ", x_test.shape)
print("Shape of y_train : ", y_train.shape)
print("Shape of y_test  : ", y_test.shape)

Shape of x_train :  (4827, 8)
Shape of x_test  :  (2070, 8)
Shape of y_train :  (4827,)
Shape of y_test  :  (2070,)


#### Turing hyperparameter

In [14]:
def process_cv_results(cv_results, top_n=10, output_filename="df_em_porcentagem.csv"):
    cv_results_df = pd.DataFrame(cv_results)

    # Classifique o DataFrame com base na pontuação do teste e no tempo
    cv_results_df = cv_results_df.sort_values(
        by=["mean_test_score", "mean_score_time"], ascending=[False, True]
    )

    # Remova as linhas duplicadas com base na pontuação do teste
    cv_results_df = cv_results_df.drop_duplicates(subset="mean_test_score", keep="first")

    # Selecione as melhores combinações
    param_col = [col for col in cv_results_df.columns if col.startswith("param")][:-1] + [
        "mean_test_score",
        "mean_train_score",
    ]
    cv_results_df = cv_results_df[param_col]
    top_results = cv_results_df.sort_values(by="mean_test_score", ascending=False)[:top_n]
    atributos = ["mean_test_score", "mean_train_score"]

    # Função para converter os valores para porcentagem
    def convert_to_percentage(value):
        return f"{value * 100:.2f}%"

    # Aplica a função de conversão a todas as células dos atributos selecionados e salva em um arquivo CSV
    top_results[atributos] = top_results[atributos].applymap(convert_to_percentage)
    top_results.to_csv(output_filename, index=False)

    return top_results


##### Árvore de Decisão

In [None]:
parametros = {
    "criterion": ["gini", "entropy"],
    "splitter": ["random", "best"],
    "max_depth": [4, 5, 7, 8],  # Ou qualquer outro intervalo apropriado
    "min_samples_split": [2, 5, 10, 15],
    "min_samples_leaf": [2, 3, 5, 9, 10],
    # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
    "max_features": [5, 6, 7, 8, 9, 10],
    "max_leaf_nodes": [None, 5, 10, 14, 15, 16, 17],
    # 'min_impurity_decrease': [0.0, 0.1, 0.2],
    # 'ccp_alpha': [0.0, 0.1, 0.2],
}

In [None]:
tree_clf = ClassifierWrapper(DecisionTreeClassifier(random_state=42), parametros)
tree_clf.grid_search(x_train, y_train, cv=10)
tree_clf.train_and_print_scores(x_train, y_train, x_test, y_test)

In [None]:
process_cv_results(tree_clf.cv_results_, top_n=10, output_filename='df_em_porcentagem_tree.csv')

##### Rede Neural

In [None]:
parametros = {
    "hidden_layer_sizes": [
        (50,),
        (100,),
        (50, 50),
        (100, 100),
        (500, 250, 125),
        (200, 100, 50),
    ],  # Experimente diferentes configurações de camadas e neurônios
    "activation": ["identity", "logistic", "tanh", "relu"],
    "solver": ["lbfgs", "sgd", "adam"],
    "alpha": [0.0001, 0.001, 0.01],
    "batch_size": [100, 200, 300],
    # 'learning_rate': ['invscaling'],
    # 'learning_rate_init': [0.001, 0.01, 0.1],
    # 'power_t': [0.5, 0.9],
    "max_iter": [100, 200],
    # 'shuffle': [True, False],
    # 'random_state': [None, 42],
    # 'tol': [1e-4, 1e-3, 1e-2],
    # 'verbose': [True, False],
    # 'warm_start': [True, False],
    # 'momentum': [0.5, 0.9],
    # 'nesterovs_momentum': [True, False],
    # 'early_stopping': [True, False],
    # 'validation_fraction': [0.1, 0.2],
    # 'beta_1': [0.9, 0.99],
    # 'beta_2': [0.999, 0.9999],
    # 'epsilon': [1e-8, 1e-7],
    # 'n_iter_no_change': [5, 10, 15],
    # 'max_fun': [10000, 15000, 20000]
}

In [None]:
neural_clf = ClassifierWrapper(MLPClassifier(random_state=42), parametros)
neural_clf.grid_search(x_train, y_train, cv=3)
neural_clf.train_and_print_scores(x_train, y_train, x_test, y_test)

In [None]:
process_cv_results(neural_clf.cv_results_, top_n=10, output_filename='df_em_porcentagem_neural.csv')

##### Random Forest

In [None]:
parametros = {
    "n_estimators": [100,200,300],  # Você pode ajustar o número de árvores conforme necessário
    "criterion": ['gini', 'entropy', 'log_loss'],
    "max_depth": [9, 10, 12],
    'min_samples_split': [2, 5,10],
    "min_samples_leaf": [2,4,6],
    # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
    "max_features": [5,6,7],
    # 'max_leaf_nodes': [None, 10, 20, 30],
    # 'min_impurity_decrease': [0.0, 0.1, 0.2],
    'bootstrap': [False],
    # 'oob_score': [True, False],
    'n_jobs': [-1],  # Use -1 para usar todos os processadores disponíveis
    # 'random_state': [None, 42],  # Use um valor fixo para repetibilidade
    # 'verbose': [0, 1, 2],
    # 'warm_start': [True, False],
    # 'class_weight': [None, 'balanced', 'balanced_subsample'],
    # 'ccp_alpha': [0.0, 0.1, 0.2],
    # 'max_samples': [None, 0.7, 0.8, 0.9],
}

In [None]:
forest_clf = ClassifierWrapper(RandomForestClassifier(random_state=42), parametros)
forest_clf.grid_search(x_train, y_train, cv=10)
forest_clf.train_and_print_scores(x_train, y_train, x_test, y_test)

In [None]:
process_cv_results(forest_clf.cv_results_, top_n=10, output_filename='df_em_porcentagem_rf.csv')

##### SVM

In [None]:
parametros = {
    "C": [0.1, 0.01],
    'kernel': ['rbf', 'poly'],
    "degree": [3,4,5],
    # "gamma": ["scale", "auto"],
    # 'coef0': [0.0, 0.1, 1.0],
    # 'shrinking': [True, False],
    # 'probability': [True],
    # 'tol': [1e-4],
    # 'cache_size': np.arange(1,11,2),
    # "class_weight": ["balanced"],
    # 'verbose': [True],
    # 'max_iter': [100, 1000, -1],  # -1 indica nenhum limite
    # 'decision_function_shape': ['ovr', 'ovo'],
    # 'break_ties': [True],
}

In [None]:
svm_clf = ClassifierWrapper(SVC(random_state=42), parametros)
svm_clf.grid_search(x_train, y_train, cv=10)
svm_clf.train_and_print_scores(x_train, y_train, x_test, y_test)

In [None]:
process_cv_results(svm_clf.cv_results_, top_n=10, output_filename='df_em_porcentagem_SVM.csv')

### Cross Validation

In [40]:
x_poscomp = np.concatenate((x_train, x_test), axis=0)
y_poscomp = np.concatenate((y_train, y_test), axis=0)

In [None]:
# Uso para Decision Tree
params_arvore = {
    "criterion": "gini",
    "max_depth": 5,
    "max_features": 10,
    "min_samples_leaf": 10,
    "splitter": "best",
    "random_state": 42,
    "max_leaf_nodes": 15,
}
resultados_arvore = ClassifierWrapper.avaliar_classificador(
    DecisionTreeClassifier, params_arvore, x_poscomp, y_poscomp
)

# Uso para Random Forest
params_random_forest = {
    "bootstrap": False,
    "criterion": "entropy",
    "max_depth": 10,
    "max_features": 6,
    "min_samples_leaf": 2,
    "n_estimators": 1000,
    "n_jobs": -1,
    "warm_start": True,
    "random_state": 42,
}
resultados_random_forest = ClassifierWrapper.avaliar_classificador(
    RandomForestClassifier, params_random_forest, x_poscomp, y_poscomp
)

# Uso para SVM
params_svm = {
    "C": 20,
    "degree": 1,
    "kernel":"rbf",
    "random_state": 42,
    "tol": 0.0001,
}
resultados_svm = ClassifierWrapper.avaliar_classificador(
    SVC, params_svm, x_poscomp, y_poscomp
)

# Uso para MLP (Rede Neural)
params_rede_neural = {
    "activation": "logistic",
    "alpha": 0.0001,
    "batch_size": 100,
    "hidden_layer_sizes": (100,),
    "max_iter": 100,
    "solver": "lbfgs",
    "random_state": 42,
}
resultados_rede_neural = ClassifierWrapper.avaliar_classificador(
    MLPClassifier, params_rede_neural, x_poscomp, y_poscomp
)

In [None]:
resultados = pd.DataFrame(
    {
        "decisionTree_treino": resultados_arvore[0]["train_score"],
        "decisionTree_teste": resultados_arvore[0]["test_score"],
        "randomForest_treino": resultados_random_forest[0]["train_score"],
        "randomForest_teste": resultados_random_forest[0]["test_score"],
        "svm_treino": resultados_svm[0]["train_score"],
        "svm_teste": resultados_svm[0]["test_score"],
        "redeNeural_treino": resultados_rede_neural[0]["train_score"],
        "redeNeural_teste": resultados_rede_neural[0]["test_score"],
    }
)

In [None]:
resultados

In [None]:
resultados.var()

In [None]:
(resultados.std() / resultados.mean()) * 100

In [None]:
medias = resultados.mean()
for nome_coluna, media in medias.items():
    print(f"Média da {nome_coluna}: {media * 100:.2f}%")

In [None]:
resultados_teste = resultados.loc[
    :, ["decisionTree_teste", "randomForest_teste", "svm_teste", "redeNeural_teste"]
]

In [None]:
resultados_teste.columns = ["Árvore de Decisão", "Random Forest", "SVM", "Rede Neural"]

In [None]:
resultados_teste

In [None]:
resultados_treino = resultados.loc[
    :, ["decisionTree_treino", "randomForest_treino", "svm_treino", "redeNeural_treino"]
]

In [None]:
resultados_treino

In [None]:
resultados_treino.columns = ["Árvore de Decisão", "Random Forest", "SVM", "Rede Neural"]

In [None]:
resultados_treino.mean()

In [None]:
models = ["decisionTree", "randomForest", "svm", "redeNeural"]
models_nome = ["Árvore de Decisão", "Random Forest", "SVM", "Rede Neural"]
visualizacao.plot_learning_curve(
    resultados,
    models=models,
    model_names=models_nome,
    save_path="../imagens/curva_aprendizadoML0.pdf",
)

#### Teste de normalidade

In [None]:
alpha = 0.05

In [None]:
from scipy.stats import shapiro

In [None]:
shapiro(resultados_arvore[0]["test_score"]), shapiro(
    resultados_random_forest[0]["test_score"]
), shapiro(resultados_svm[0]["test_score"]), shapiro(
    resultados_rede_neural[0]["test_score"]
)

#### Teste de hipótese com ANOVA e Tukey

In [None]:
from scipy.stats import f_oneway

In [None]:
_, p = f_oneway(
    resultados_arvore[0]["test_score"],
    resultados_random_forest[0]["test_score"],
    resultados_svm[0]["test_score"],
    resultados_rede_neural[0]["test_score"],
)
p

In [None]:
alpha = 0.05
if p <= alpha:
    print("Hipótese nula rejeitada. Dados são diferentes")
else:
    print("Hipótese alternativa rejeitada. Resultados são iguais")

In [None]:
resultados_algoritmos = {
    "accuracy": np.concatenate(
        [
            resultados_arvore[0]["test_score"],
            resultados_random_forest[0]["test_score"],
            resultados_svm[0]["test_score"],
            resultados_rede_neural[0]["test_score"],
        ]
    ),
    "algoritmo": ["arvore"] * len(resultados_arvore[0]["test_score"])
    + ["random_forest"] * len(resultados_random_forest[0]["test_score"])
    + ["svm"] * len(resultados_svm[0]["test_score"])
    + ["rede_neural"] * len(resultados_rede_neural[0]["test_score"]),
}

In [None]:
resultados_df = pd.DataFrame(resultados_algoritmos)
resultados_df.head()

In [None]:
from statsmodels.stats.multicomp import MultiComparison

In [None]:
compara_algoritmos = MultiComparison(
    resultados_df["accuracy"], resultados_df["algoritmo"]
)

In [None]:
teste_estatistico = compara_algoritmos.tukeyhsd()
print(teste_estatistico)

In [None]:
resultados_teste.mean()

In [None]:
teste_estatistico.plot_simultaneous();

#### Salvar o classificador treinado

In [41]:
classificador_randomForest = RandomForestClassifier(bootstrap=False, criterion='entropy', max_depth=9,
                       max_features=6, min_samples_leaf=2, min_samples_split=2,
                       n_estimators=1000, random_state=42)
classificador_randomForest.fit(x_poscomp, y_poscomp)

In [42]:
predict = classificador_randomForest.predict(x_test)

In [43]:
np.unique(predict, return_counts=True)

(array([0, 1]), array([1554,  516]))

In [None]:
from sklearn.metrics import auc, roc_curve

fpr, tpr, thresholds = roc_curve(y_test, predict)

# Calcule a área sob a curva ROC (AUC)
roc_auc = auc(fpr, tpr)

# Plote a curva ROC
plt.figure(figsize=(8, 8))
plt.plot(
    fpr,
    tpr,
    color="darkorange",
    lw=2,
    label="ROC curve (area = {:.2f})".format(roc_auc),
)
plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend(loc="lower right")
plt.show()

In [None]:
with open("../dados/classificadores/randomForest_finalizado2.sav", "wb") as file:
    pickle.dump(classificador_randomForest, file, protocol=pickle.HIGHEST_PROTOCOL)

#### Carregar um classificar treinado

In [None]:
randomForest = pickle.load(open("randomForest_finalizado.sav", "rb"))

## Applications of the two models

### Candidates classified for the computing area

<h4> <i>Load data </i></h4>

In [None]:
df_computation = dataset[dataset["area_concentration"] == "computacao"]

In [None]:
df_computation

In [None]:
df_computation

In [None]:
linhas_pesquisas = df_computation.groupby('linhas_pesquisas').size().reset_index(name='counts')
visualizacao.barplot_view(dataframe=linhas_pesquisas, x='linhas_pesquisas', y='counts', show_legend=False, 
                         figsize=(8,5), save_path="../imagens/quant_class_modelo2.pdf", dodge=True)

In [None]:
df_computation = df_computation.loc[
    :,
    [
        "IDADE",
        "SEXO",
        "REGIAO",
        "ESTADO",
        "matematica",
        "fund_computacao",
        "tec_computacao",
        "total",
        "linhas_pesquisas",
    ],
]

In [None]:
categorical_columns_comp = ["linhas_pesquisas", "SEXO", "REGIAO", "ESTADO"]
df_computation = encode_categorical_columns(df_computation, categorical_columns_comp)

In [None]:
x_computation = df_computation.drop(["linhas_pesquisas"], axis=1)
y_computation = df_computation["linhas_pesquisas"]

<h5><i>Split of data</i></h5>

In [None]:
x_train_comp, x_test_comp, y_train_comp, y_test_comp = train_test_split(
    x_computation, y_computation, test_size=0.3, stratify=y_computation, random_state=42
)

In [None]:
print("Shape of x_train : ", x_train_comp.shape)
print("Shape of x_test  : ", x_test_comp.shape)
print("Shape of y_train : ", y_train_comp.shape)
print("Shape of y_test  : ", y_test_comp.shape)

##### Busca por hiperparamentros

###### Decision Tree

In [None]:
parametros = {
    "criterion": ["gini", "entropy", "log_loss"],
    "splitter": ["random", "best"],
    "max_depth": [8,9,10],  # Ou qualquer outro intervalo apropriado
    'min_samples_split': [2,5],
    "min_samples_leaf": [9,10,11],
    # 'min_weight_fraction_leaf': [0.5],
    "max_features": [7,8,9],
    "max_leaf_nodes": [15,20,25],
    # 'min_impurity_decrease': [0.0, 0.1, 0.2],
    # 'ccp_alpha': [0.0, 0.1, 0.2],
}

In [None]:
tree_clf = ClassifierWrapper(DecisionTreeClassifier(random_state=42), parametros)
tree_clf.grid_search(x_train_comp, y_train_comp, cv=10)
tree_clf.train_and_print_scores(x_train_comp, y_train_comp, x_test_comp, y_test_comp)

In [None]:
process_cv_results(tree_clf.cv_results_, top_n=10, output_filename="df_em_porcents_treeM1.csv")

###### Random Forest

In [None]:
parametros = {
    "n_estimators": [200, 300, 500],
    "criterion": ['gini', 'entropy', 'log_loss'],
    "max_depth": [4, 5, 6],
    "min_samples_split": [2, 4, 6],
    "min_samples_leaf": [7, 8, 9],
    # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
    "max_features": [7, 8, 9],
    "max_leaf_nodes": [10, 20, 30],
    # 'min_impurity_decrease': [0.0, 0.1, 0.2],
    "bootstrap": [True, False],
    # "oob_score": [True],
    "n_jobs": [-1],  # Use -1 para usar todos os processadores disponíveis
    # 'verbose': [0, 1, 2],
    # "warm_start": [True],
    # 'class_weight': ['balanced', 'balanced_subsample'],
    # 'ccp_alpha': [0.0, 0.1, 0.2],
    # 'max_samples': [1,2,3,4,5],
}

In [None]:
forest_clf = ClassifierWrapper(RandomForestClassifier(random_state=42), parametros)
forest_clf.grid_search(x_train_comp, y_train_comp, cv=10)
forest_clf.train_and_print_scores(x_train_comp, y_train_comp, x_test_comp, y_test_comp)

In [None]:
process_cv_results(forest_clf.cv_results_, top_n=10, output_filename="df_em_porcents_forestM1.csv")

###### SVM

In [None]:
parametros = {
    "C": [0.1,1,2,3,4],
    'kernel': ['rbf', 'poly'],
    "degree": [1, 2, 3],
    # "gamma": ["scale", "auto", 0.1, 1.0],
    # 'coef0': [0.0, 0.1, 1.0],
    # 'shrinking': [True, False],
    # 'probability': [True],
    # 'tol': [1e-4],
    # 'cache_size': np.arange(1,11,2),
    # "class_weight": ["balanced"],
    # 'verbose': [True],
    # 'max_iter': [100, 1000, -1],  # -1 indica nenhum limite
    # 'decision_function_shape': ['ovr', 'ovo'],
    # 'break_ties': [True],
}

In [None]:
svm_clf = ClassifierWrapper(SVC(random_state=42), parametros)
svm_clf.grid_search(x_train_comp, y_train_comp, cv=10)
svm_clf.train_and_print_scores(x_train_comp, y_train_comp, x_test_comp, y_test_comp)

In [None]:
process_cv_results(svm_clf.cv_results_, top_n=10, output_filename="df_em_porcents_svmM1.csv")

###### Rede Neural

In [None]:
parametros = {
    "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 100), (200, 100, 50)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    "solver": ["adam"],
    "alpha": [0.1],
    "batch_size": ["auto", 100, 200],
    # 'learning_rate': ['constant', 'invscaling', 'adaptive'],
    "learning_rate_init": [0.01],
    # 'power_t': [0.5, 0.9],
    # 'max_iter': [200, 300, 400],
    # 'shuffle': [True, False],
    # 'random_state': [None, 42],
    # 'tol': [1e-4, 1e-3, 1e-2],
    # 'verbose': [True, False],
    # 'warm_start': [True, False],
    # 'momentum': [0.5, 0.9],
    # 'nesterovs_momentum': [True, False],
    "early_stopping": [True, False],
    # 'validation_fraction': [0.1, 0.2],
    "beta_1": [0.99],
    "beta_2": [0.999],
    "epsilon": [1e-8],
    "n_iter_no_change": [20],
    # 'max_fun': [10000, 15000, 20000]
}

In [None]:
neural_clf = ClassifierWrapper(
    MLPClassifier(random_state=42), parametros
)
neural_clf.grid_search(x_train_comp, y_train_comp, cv=10)
neural_clf.train_and_print_scores(x_train_comp, y_train_comp, x_test_comp, y_test_comp)

In [None]:
process_cv_results(neural_clf.cv_results_, top_n=10, output_filename="df_em_porcents_neuralM1.csv")

##### Validação Cruzada

In [None]:
x_poscomp_comp = np.concatenate((x_train_comp, x_test_comp), axis=0)
y_poscomp_comp = np.concatenate((y_train_comp, y_test_comp), axis=0)

In [None]:
# Uso para Decision Tree
params_arvore_comp = {
    "criterion": "gini",
    "max_depth": 9,
    "max_features": 8,
    "max_leaf_nodes": 20,
    "min_samples_leaf": 10,
    "splitter": "random",
    "random_state": 42,
}
resultados_arvore_comp = ClassifierWrapper.avaliar_classificador(
    DecisionTreeClassifier, params_arvore_comp, x_poscomp_comp, y_poscomp_comp
)

# Uso para Random Forest
params_random_forest_comp = {
    "bootstrap": True,
    "criterion": "gini",
    "max_depth": 5,
    "max_features": 7,
    "max_leaf_nodes": 30,
    "min_samples_leaf": 9,
    "min_samples_split": 6,
    "n_estimators": 300,
    "n_jobs": -1,
    "random_state": 42,
}
resultados_random_forest_comp = ClassifierWrapper.avaliar_classificador(
    RandomForestClassifier, params_random_forest_comp, x_poscomp_comp, y_poscomp_comp
)

# Uso para SVM
params_svm_comp = {
    "C": 8,
    "break_ties": True,
    "coef0": 0.1,
    "kernel": "poly",
    "decision_function_shape": "ovr",
    "degree": 1,
    "gamma": "auto",
    "tol": 0.0001,
    "random_state": 42,
}
resultados_svm_comp = ClassifierWrapper.avaliar_classificador(
    SVC, params_svm_comp, x_poscomp_comp, y_poscomp_comp
)

# Uso para MLP (Rede Neural)
params_rede_neural_comp = {
    "alpha": 0.1,
    "batch_size": "auto",
    "beta_1": 0.99,
    "beta_2": 0.999,
    "early_stopping": False,
    "epsilon": 1e-08,
    "hidden_layer_sizes": (100, 100),
    "learning_rate_init": 0.01,
    "n_iter_no_change": 20,
    "solver": "adam",
    "random_state": 42,
}
resultados_rede_neural_comp = ClassifierWrapper.avaliar_classificador(
    MLPClassifier, params_rede_neural_comp, x_poscomp_comp, y_poscomp_comp
)

In [None]:
resultados_comp = pd.DataFrame(
    {
        "decisionTree_treino": resultados_arvore_comp[0]["train_score"],
        "decisionTree_teste": resultados_arvore_comp[0]["test_score"],
        "randomForest_treino": resultados_random_forest_comp[0]["train_score"],
        "randomForest_teste": resultados_random_forest_comp[0]["test_score"],
        "svm_treino": resultados_svm_comp[0]["train_score"],
        "svm_teste": resultados_svm_comp[0]["test_score"],
        "redeNeural_treino": resultados_rede_neural_comp[0]["train_score"],
        "redeNeural_teste": resultados_rede_neural_comp[0]["test_score"],
    }
)

In [None]:
resultados_comp

In [None]:
medias = resultados_comp.mean()
for nome_coluna, media in medias.items():
    print(f"Média da {nome_coluna}: {media * 100:.2f}%")

In [None]:
resultados_comp_teste = resultados_comp.loc[
    :, ["decisionTree_teste", "randomForest_teste", "svm_teste", "redeNeural_teste"]
]

In [None]:
models = ["decisionTree", "randomForest", "svm", "redeNeural"]
models_nome = ["Árvore de Decisão", "Random Forest", "SVM", "Rede Neural"]

visualizacao.plot_learning_curve(
    resultados_comp,
    models=models,
    model_names=models_nome,
    save_path="../imagens/curva_aprendizadoML1.pdf",
)

In [None]:
resultados_comp_teste.mean()

In [None]:
resultados_comp_teste.columns = [
    "Árvore de Decisão",
    "Random Forest",
    "SVM",
    "Rede Neural",
]

In [None]:
resultados_comp_teste

In [None]:
resultados_comp_treino = resultados_comp.loc[
    :, ["decisionTree_treino", "randomForest_treino", "svm_treino", "redeNeural_treino"]
]

In [None]:
resultados_comp_treino.columns = [
    "Árvore de Decisão",
    "Random Forest",
    "SVM",
    "Rede Neural",
]

In [None]:
resultados_comp_treino.mean()

In [None]:
(resultados_comp.std() / resultados_comp.mean()) * 100

Teste de normalidade

In [None]:
alpha = 0.05

In [None]:
from scipy.stats import shapiro

In [None]:
shapiro(resultados_arvore_comp[0]["test_score"]), shapiro(
    resultados_random_forest_comp[0]["test_score"]
), shapiro(resultados_svm_comp[0]["test_score"]), shapiro(
    resultados_rede_neural_comp[0]["test_score"]
)

Teste de hipótese com ANOVA e Tukey

In [None]:
from scipy.stats import f_oneway

In [None]:
_, p = f_oneway(
    resultados_arvore_comp[0]["test_score"],
    resultados_random_forest_comp[0]["test_score"],
    resultados_svm_comp[0]["test_score"],
    resultados_rede_neural_comp[0]["test_score"],
)
p

In [None]:
alpha = 0.05
if p <= alpha:
    print("Hipótese nula rejeitada. Dados são diferentes")
else:
    print("Hipótese alternativa rejeitada. Resultados são iguais")

In [None]:
resultados_algoritmos_comp = {
    "accuracy": np.concatenate(
        [
            resultados_arvore_comp[0]["test_score"],
            resultados_random_forest_comp[0]["test_score"],
            resultados_svm_comp[0]["test_score"],
            resultados_rede_neural_comp[0]["test_score"],
        ]
    ),
    "algoritmo": ["arvore"] * len(resultados_arvore_comp[0]["test_score"])
    + ["random_forest"] * len(resultados_random_forest_comp[0]["test_score"])
    + ["svm"] * len(resultados_svm_comp[0]["test_score"])
    + ["rede_neural"] * len(resultados_rede_neural_comp[0]["test_score"]),
}

In [None]:
resultados_df_comp = pd.DataFrame(resultados_algoritmos_comp)
resultados_df_comp

In [None]:
!pip install statsmodels

In [None]:
from statsmodels.stats.multicomp import MultiComparison

In [None]:
compara_algoritmos_comp = MultiComparison(
    resultados_df_comp["accuracy"], resultados_df_comp["algoritmo"]
)

In [None]:
teste_estatistico_comp = compara_algoritmos_comp.tukeyhsd()
print(teste_estatistico_comp)

In [None]:
resultados_comp_teste.mean()

In [None]:
teste_estatistico_comp.plot_simultaneous();

Salvar o classificador treinado

In [None]:
classificador_randomForest = RandomForestClassifier(criterion='gini',max_depth=5, max_features=7, max_leaf_nodes=30,
                                                    min_samples_leaf=9, n_estimators=300, n_jobs=-1,
                                                    min_samples_split= 6,random_state=42)
classificador_randomForest.fit(x_poscomp_comp, y_poscomp_comp)

In [None]:
import pickle
pickle.dump(classificador_randomForest, open("../dados/classificadores/randomForest_finalizado_model2.sav", "wb"))

Carregar um classificar treinado

In [None]:
randomForest = pickle.load(open("randomForest_finalizado.sav", "rb"))

##### Algoritmos para computação

In [None]:
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)

algorithms = [
    ("Árvore de Decisão (C4.5)", DecisionTreeClassifier(random_state=42)),
    ("Random Forest", RandomForestClassifier(random_state=42, n_estimators=1000)),
    ("SVM", SVC(random_state=42)),
    ("Rede Neural", MLPClassifier(random_state=42)),
]

for name, classifier in algorithms:
    classifier.fit(x_train_comp, y_train_comp)
    train_predictions_comp = classifier.predict(x_train_comp)
    test_predictions_comp = classifier.predict(x_test_comp)

    train_accuracy_comp = accuracy_score(y_train_comp, train_predictions_comp)
    test_accuracy_comp = accuracy_score(y_test_comp, test_predictions_comp)

    precision_comp = precision_score(
        y_test_comp, test_predictions_comp, average="macro"
    )
    recall_comp = recall_score(y_test_comp, test_predictions_comp, average="macro")
    f1_comp = f1_score(y_test_comp, test_predictions_comp, average="macro")

    conf_matrix_comp = confusion_matrix(y_test_comp, test_predictions_comp)

    print(
        f"{name} - Acurácia no Conjunto de Treinamento: {train_accuracy_comp * 100:.2f}%"
    )
    print(f"{name} - Acurácia no Conjunto de Teste: {test_accuracy_comp * 100:.2f}%")
    print(f"{name} - Precisão: {precision_comp * 100:.2f}%")
    print(f"{name} - Recall: {recall_comp * 100:.2f}%")
    print(f"{name} - F1-Score: {f1_comp * 100:.2f}%")
    print(f"{name} - Matriz de Confusão:\n{conf_matrix_comp}\n")

### Candidates classified for the information area

In [None]:
df_information = dataset[dataset["area_concentration"] == "informatica"]

In [None]:
df_information.head()

In [None]:
linhas_pesquisas2 = df_information.groupby('linhas_pesquisas').size().reset_index(name='counts')
visualizacao.barplot_view(dataframe=linhas_pesquisas2, x='linhas_pesquisas', y='counts', show_legend=False, 
                         figsize=(8,5), save_path="../imagens/quant_class_modelo3.pdf", dodge=True)

In [None]:
df_information = df_information.loc[
    :,
    [
        "IDADE",
        "SEXO",
        "REGIAO",
        "ESTADO",
        "matematica",
        "fund_computacao",
        "tec_computacao",
        "total",
        "linhas_pesquisas",
    ],
]

In [None]:
categorical_columns_inf = ["linhas_pesquisas", "SEXO", "REGIAO", "ESTADO"]
df_information = encode_categorical_columns(df_information, categorical_columns_inf)

In [None]:
df_information.head()

In [None]:
x_information = df_information.drop(["linhas_pesquisas"], axis=1)
y_information = df_information["linhas_pesquisas"]

Dividir a Base

In [None]:
x_train_inf, x_test_inf, y_train_inf, y_test_inf = train_test_split(
    x_information, y_information, test_size=0.3, stratify=y_information
)

In [None]:
print("Shape of x_train : ", x_train_inf.shape)
print("Shape of x_test  : ", x_test_inf.shape)
print("Shape of y_train : ", y_train_inf.shape)
print("Shape of y_test  : ", y_test_inf.shape)

###### Arvore de Decisão

In [None]:
parametros = {
    "criterion": ["entropy", "log_loss", "gini"],
    "splitter": ["random", "best"],
    "max_depth": [3, 4, 5, 6, 7],  # Ou qualquer outro intervalo apropriado
    'min_samples_split': [2, 5, 10],
    "min_samples_leaf": np.arange(1, 21, 2),
    # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
    "max_features": [4, 5, 6],
    'max_leaf_nodes': [None, 5, 10, 20],
    # 'min_impurity_decrease': [0.0, 0.1, 0.2],
    # 'ccp_alpha': [0.0, 0.1, 0.2],
}

In [None]:
tree_clf = ClassifierWrapper(DecisionTreeClassifier(random_state=42), parametros)
tree_clf.grid_search(x_train_inf, y_train_inf, cv=10)
tree_clf.train_and_print_scores(x_train_inf, y_train_inf, x_test_inf, y_test_inf)

In [None]:
process_cv_results(tree_clf.cv_results_, top_n=10, output_filename='df_em_porcentes_treeM2.csv')

###### Random Forest

In [None]:
parametros = {
    "n_estimators": [300,500,600,700],
    "criterion": ['gini', 'entropy', 'log_loss'],
    "max_depth": [6,7,8,9],
    'min_samples_split': [2,4,6],
    "min_samples_leaf": [8,9,10],
    # 'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
    "max_features": ['sqrt', 'log2', None],
    'max_leaf_nodes': [None, 20, 30,40],
    # 'min_impurity_decrease': [0.0, 0.1, 0.2],
    "bootstrap": [False, True],
    # 'oob_score': [True, False],
    "n_jobs": [-1],  # Use -1 para usar todos os processadores disponíveis
    # 'verbose': [0, 1, 2],
    # 'warm_start': [True],
    # 'class_weight': ['balanced'],
    # 'ccp_alpha': [0.0, 0.1, 0.2],
    # 'max_samples': [None, 0.7, 0.8, 0.9],
}

In [None]:
forest_clf = ClassifierWrapper(RandomForestClassifier(random_state=42), parametros)
forest_clf.grid_search(x_train_inf, y_train_inf, cv=10)
forest_clf.train_and_print_scores(x_train_inf, y_train_inf, x_test_inf, y_test_inf)

In [None]:
process_cv_results(forest_clf.cv_results_, top_n=10, output_filename='df_em_porcentes_forestM2.csv')

###### SVM

In [None]:
parametros = {
    "C": [1, 2, 3, 4],
    "kernel": ["poly", "rbf"],
    "degree": [5, 6, 7, 8],
    # 'gamma': ['scale', 'auto'],
    # "coef0": [0.1, 1.0],
    # 'shrinking': [True, False],
    # 'probability': [True],
    # 'tol': [1e-4],
    # 'cache_size': np.arange(1,11,2),
    # "class_weight": [None, "balanced"],
    # 'verbose': [True],
    # 'max_iter': [100, 1000, -1],  # -1 indica nenhum limite
    # 'decision_function_shape': ['ovr', 'ovo'],
    # 'break_ties': [True],
}

In [None]:
svm_clf = ClassifierWrapper(SVC(random_state=42), parametros)
svm_clf.grid_search(x_train_inf, y_train_inf, cv=10)
svm_clf.train_and_print_scores(x_train_inf, y_train_inf, x_test_inf, y_test_inf)

In [None]:
process_cv_results(svm_clf.cv_results_, top_n=10, output_filename='df_em_porcentes_svmM2.csv')

##### Rede Neural

In [None]:
parametros = {
    "hidden_layer_sizes": [(50,), (100,), (50, 50), (100, 100), (200, 100, 50)],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    "solver": ['lbfgs', 'sgd', 'adam'],
    "alpha": [0.01, 0.001, 0.0001],
    # "batch_size": ["auto", 100, 200],
    # 'learning_rate': ['constant', 'invscaling', 'adaptive'],
    # "learning_rate_init": [0.01],
    # 'power_t': [0.5, 0.9],
    'max_iter': [2000,2500,3000],
    # 'shuffle': [True, False],
    # 'random_state': [None, 42],
    # 'tol': [1e-4, 1e-3, 1e-2],
    # 'verbose': [True, False],
    # 'warm_start': [True, False],
    # 'momentum': [0.5, 0.9],
    # 'nesterovs_momentum': [True, False],
    # "early_stopping": [True, False],
    # 'validation_fraction': [0.1, 0.2],
    # "beta_1": [0.99],
    # "beta_2": [0.999],
    # "epsilon": [1e-8],
    # "n_iter_no_change": [20],
    # 'max_fun': [10000, 15000, 20000]
}

In [None]:
neural_clf = ClassifierWrapper(MLPClassifier(random_state=42), parametros)
neural_clf.grid_search(x_train_inf, y_train_inf, cv=10)
neural_clf.train_and_print_scores(x_train_inf, y_train_inf, x_test_inf, y_test_inf)

In [None]:
process_cv_results(neural_clf.cv_results_, top_n=10, output_filename='df_em_porcentes_neuralM2.csv')

##### Validação Cruzada

In [None]:
x_poscomp_inf = np.concatenate((x_train_inf, x_test_inf), axis=0)
y_poscomp_inf = np.concatenate((y_train_inf, y_test_inf), axis=0)

In [None]:
# Uso para Decision Tree
params_arvore_inf = {
    "criterion": "entropy",
    "max_depth": 10,
    "max_features": 4,
    "min_samples_leaf": 2,
    "random_state": 42,
    "splitter": "random",
}
resultados_arvore_inf = ClassifierWrapper.avaliar_classificador(
    DecisionTreeClassifier, params_arvore_inf, x_poscomp_inf, y_poscomp_inf
)

# Uso para Random Forest
params_random_forest_inf = {
    "bootstrap": False,
    "criterion": "entropy",
    "max_depth": 7,
    "max_features": "sqrt",
    "min_samples_leaf": 9,
    "n_estimators": 500,
    "n_jobs": -1,
    "random_state": 42,
}
resultados_random_forest_inf = ClassifierWrapper.avaliar_classificador(
    RandomForestClassifier, params_random_forest_inf, x_poscomp_inf, y_poscomp_inf
)

# Uso para SVM
params_svm_inf = {
    "C": 4,
    "degree": 7,
    "kernel": "poly",
    "random_state": 42,
}
resultados_svm_inf = ClassifierWrapper.avaliar_classificador(
    SVC, params_svm_inf, x_poscomp_inf, y_poscomp_inf
)

# Uso para MLP (Rede Neural)
params_rede_neural_inf = {
    "activation": "logistic",
    "alpha": 1e-05,
    "batch_size": 50,
    "hidden_layer_sizes": (200, 100, 50),
    "learning_rate": "constant",
    "learning_rate_init": 0.001,
    "max_iter": 70,
    "solver": "adam",
    "random_state": 42,
}
resultados_rede_neural_inf = ClassifierWrapper.avaliar_classificador(
    MLPClassifier, params_rede_neural_inf, x_poscomp_inf, y_poscomp_inf
)

In [None]:
# Supondo que x_train, x_test, y_train, y_test são seus dados de treino e teste

# Para a Árvore de Decisão
matriz_confusao_arvore_inf = ClassifierWrapper.gerar_matriz_confusao(
    DecisionTreeClassifier, params_arvore_inf, x_train, x_test, y_train, y_test
)

# Para a Random Forest
matriz_confusao_random_forest_inf = ClassifierWrapper.gerar_matriz_confusao(
    RandomForestClassifier, params_random_forest_inf, x_train, x_test, y_train, y_test
)

# Para o SVM
matriz_confusao_svm_inf = ClassifierWrapper.gerar_matriz_confusao(
    SVC, params_svm_inf, x_train, x_test, y_train, y_test
)

# Para a Rede Neural
matriz_confusao_rede_neural_inf = ClassifierWrapper.gerar_matriz_confusao(
    MLPClassifier, params_rede_neural_inf, x_train, x_test, y_train, y_test
)

In [None]:
resultados_inf = pd.DataFrame(
    {
        "decisionTree_treino": resultados_arvore_inf[0]["train_score"],
        "decisionTree_teste": resultados_arvore_inf[0]["test_score"],
        "randomForest_treino": resultados_random_forest_inf[0]["train_score"],
        "randomForest_teste": resultados_random_forest_inf[0]["test_score"],
        "svm_treino": resultados_svm_inf[0]["train_score"],
        "svm_teste": resultados_svm_inf[0]["test_score"],
        "redeNeural_treino": resultados_rede_neural_inf[0]["train_score"],
        "redeNeural_teste": resultados_rede_neural_inf[0]["test_score"],
    }
)

In [None]:
resultados_inf

In [None]:
medias = resultados_inf.mean()
for nome_coluna, media in medias.items():
    print(f"Média da {nome_coluna}: {media * 100:.2f}%")

In [None]:
resultados_inf_teste = resultados_inf.loc[
    :, ["decisionTree_teste", "randomForest_teste", "svm_teste", "redeNeural_teste"]
]

In [None]:
resultados_inf_teste.mean()

In [None]:
resultados_inf_teste.columns = [
    "Árvore de Decisão",
    "Random Forest",
    "SVM",
    "Rede Neural",
]

In [None]:
resultados_inf_teste

In [None]:
resultados_inf_treino = resultados_inf.loc[
    :, ["decisionTree_treino", "randomForest_treino", "svm_treino", "redeNeural_treino"]
]

In [None]:
resultados_inf_treino.columns = [
    "Árvore de Decisão",
    "Random Forest",
    "SVM",
    "Rede Neural",
]

In [None]:
resultados_inf_treino.mean()

In [None]:
(resultados_inf.std() / resultados_inf.mean()) * 100

In [None]:
visualizacao.plot_learning_curve(
    resultados_inf,
    models=models,
    model_names=models_nome,
    save_path="../imagens/curva_aprendizadoML2.pdf",
)

Teste de normalidade

In [None]:
from scipy.stats import shapiro

In [None]:
shapiro(resultados_arvore_inf[0]["test_score"]), shapiro(
    resultados_random_forest_inf[0]["test_score"]
), shapiro(resultados_svm_inf[0]["test_score"]), shapiro(
    resultados_rede_neural_inf[0]["test_score"]
)

Teste de hipótese com ANOVA e Tukey

In [None]:
from scipy.stats import f_oneway

In [None]:
_, p_inf = f_oneway(
    resultados_arvore_inf[0]["test_score"],
    resultados_random_forest_inf[0]["test_score"],
    resultados_svm_inf[0]["test_score"],
    resultados_rede_neural_inf[0]["test_score"],
)
p_inf

In [None]:
alpha = 0.05
if p_inf <= alpha:
    print("Hipótese nula rejeitada. Dados são diferentes")
else:
    print("Hipótese alternativa rejeitada. Resultados são iguais")

In [None]:
resultados_algoritmos_inf = {
    "accuracy": np.concatenate(
        [
            resultados_arvore_inf[0]["test_score"],
            resultados_random_forest_inf[0]["test_score"],
            resultados_svm_inf[0]["test_score"],
            resultados_rede_neural_inf[0]["test_score"],
        ]
    ),
    "algoritmo": ["arvore"] * len(resultados_arvore_inf[0]["test_score"])
    + ["random_forest"] * len(resultados_random_forest_inf[0]["test_score"])
    + ["svm"] * len(resultados_svm_inf[0]["test_score"])
    + ["rede_neural"] * len(resultados_rede_neural_inf[0]["test_score"]),
}

In [None]:
resultados_df_inf = pd.DataFrame(resultados_algoritmos_inf)

In [None]:
resultados_df_inf["algoritmo"] = resultados_df_inf["algoritmo"].replace(
    {
        "arvore": "Árvore de Decisão",
        "random_forest": "Random Forest",
        "svm": "SVM",
        "rede_neural": "Rede Neural",
    }
)

In [None]:
resultados_df_inf.head()

In [None]:
from statsmodels.stats.multicomp import MultiComparison

In [None]:
compara_algoritmos_inf = MultiComparison(
    resultados_df_inf["accuracy"], resultados_df_inf["algoritmo"]
)

In [None]:
teste_estatistico_inf = compara_algoritmos_inf.tukeyhsd()
print(teste_estatistico_inf)

In [None]:
ax = teste_estatistico_inf.plot_simultaneous()
ax.savefig("../imagens/comparacionML2.png")

In [None]:
resultados_inf_teste.mean()

In [None]:
 "bootstrap": False,
    "criterion": "entropy",
    "max_depth": 7,
    "max_features": "sqrt",
    "min_samples_leaf": 9,
    "n_estimators": 500,
    "n_jobs": -1,
    "random_state": 42,

In [None]:
classificador_randomForest = RandomForestClassifier(bootstrap= False,criterion= "entropy", max_depth= 7,
                                                    max_features= "sqrt",min_samples_leaf= 9, n_estimators= 500,
                                                    n_jobs= -1,random_state= 42,)
classificador_randomForest.fit(x_poscomp_inf, y_poscomp_inf)

In [None]:
import pickle
pickle.dump(classificador_randomForest, open("../dados/classificadores/randomForest_finalizado_model3.sav", "wb"))

##### Algoritmos para informação

In [None]:
algorithms = [
    ("Árvore de Decisão (C4.5)", DecisionTreeClassifier(random_state=42)),
    ("Random Forest", RandomForestClassifier(random_state=42)),
    ("SVM", SVC(random_state=42)),
    ("Rede Neural", MLPClassifier(random_state=42)),
]

for name, classifier in algorithms:
    classifier.fit(x_train_inf, y_train_inf)
    train_predictions_inf = classifier.predict(x_train_inf)
    test_predictions_inf = classifier.predict(x_test_inf)

    train_accuracy_inf = accuracy_score(y_train_inf, train_predictions_inf)
    test_accuracy_inf = accuracy_score(y_test_inf, test_predictions_inf)

    precision_inf = precision_score(y_test_inf, test_predictions_inf, average="macro")
    recall_inf = recall_score(y_test_inf, test_predictions_inf, average="macro")
    f1_inf = f1_score(y_test_inf, test_predictions_inf, average="macro")

    conf_matrix_inf = confusion_matrix(y_test_inf, test_predictions_inf)

    print(
        f"{name} - Acurácia no Conjunto de Treinamento: {train_accuracy_inf * 100:.2f}%"
    )
    print(f"{name} - Acurácia no Conjunto de Teste: {test_accuracy_inf * 100:.2f}%")
    print(f"{name} - Precisão: {precision_inf * 100:.2f}%")
    print(f"{name} - Recall: {recall_inf * 100:.2f}%")
    print(f"{name} - F1-Score: {f1_inf * 100:.2f}%")
    print(f"{name} - Matriz de Confusão:\n{conf_matrix_inf}\n")

### Os algoritmos

In [None]:
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
)

algorithms = [
    (
        "Árvore de Decisão (C4.5)",
        DecisionTreeClassifier(
            criterion="entropy", max_depth=4, min_samples_leaf=6, random_state=42
        ),
    ),
    (
        "Random Forest",
        RandomForestClassifier(
            bootstrap=False,
            criterion="entropy",
            max_depth=10,
            n_estimators=200,
            n_jobs=-1,
            random_state=42,
            warm_start=True,
        ),
    ),
    (
        "SVM",
        SVC(
            C=5000,
            break_ties=True,
            degree=1,
            probability=True,
            random_state=42,
            tol=0.0001,
            verbose=True,
        ),
    ),
    (
        "Rede Neural",
        MLPClassifier(
            activation="tanh", hidden_layer_sizes=(100, 100), random_state=42
        ),
    ),
]

for name, classifier in algorithms:
    classifier.fit(x_train, y_train)
    train_predictions = classifier.predict(x_train)
    test_predictions = classifier.predict(x_test)

    train_accuracy = accuracy_score(y_train, train_predictions)
    test_accuracy = accuracy_score(y_test, test_predictions)

    precision = precision_score(y_test, test_predictions, average="macro")
    recall = recall_score(y_test, test_predictions, average="macro")
    f1 = f1_score(y_test, test_predictions, average="macro")

    conf_matrix = confusion_matrix(y_test, test_predictions)

    print(f"{name} - Acurácia no Conjunto de Treinamento: {train_accuracy * 100:.2f}%")
    print(f"{name} - Acurácia no Conjunto de Teste: {test_accuracy * 100:.2f}%")
    print(f"{name} - Precisão: {precision * 100:.2f}%")
    print(f"{name} - Recall: {recall * 100:.2f}%")
    print(f"{name} - F1-Score: {f1 * 100:.2f}%")
    print(f"{name} - Matriz de Confusão:\n{conf_matrix}\n")