In [7]:
import os
import pandas as pd


def load_dataframes(dir="datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = [d for d in datasets if d.endswith(".csv")]

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        dfs.append({"file": dataset, "dataframe": df})

    return dfs

In [8]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB

dfs = load_dataframes()

# 10-fold CV
kf = KFold(n_splits=10, random_state=42, shuffle=True)

for df in dfs:
    dataset_name = df["file"].replace('.csv', '')
    print(dataset_name)
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe
    
    X = MinMaxScaler().fit_transform(X)
            
    gnb = GaussianNB(priors = None, var_smoothing=1e-09)
    scores = cross_val_score(gnb, X, y, scoring='accuracy', cv=kf)
    acc_gnb = scores.mean()
    
    mnb = MultinomialNB(fit_prior=True, alpha=1.0)
    scores = cross_val_score(mnb, X, y, scoring='accuracy', cv=kf)
    acc_mnb = scores.mean()
    
    cnb = ComplementNB(alpha=1.0, force_alpha=True, fit_prior=True)
    scores = cross_val_score(cnb, X, y, scoring='accuracy', cv=kf)
    acc_cnb = scores.mean()
    
    row = {"GaussianNB": f"{acc_gnb:.3f}".replace('.', ','),
     "MultinomialNB": f"{acc_mnb:.3f}".replace('.', ','), "ComplementNB": f"{acc_cnb:.3f}".replace('.', ',')}
    
    results_df = pd.DataFrame(row, index=[0])
    results_df.to_csv(f'results/NB_fold_{dataset_name}.csv', index=False)
    print(results_df)

PCA_10_CNN_16_256_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,885         0,863        0,866
PCA_10_CNN_16_128_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,823         0,796        0,822
CNN_16_128_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,801         0,838        0,815
CNN_19_256_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,880         0,881        0,871
CNN_16_128_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,823         0,838        0,807
PCA_10_CNN_16_256_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,881         0,862        0,895
CNN_16_256_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,867         0,887        0,851
CNN_19_256_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,865         0,889        0,865
PCA_10_CNN_19_256_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,877         0,875        0,873
PCA_10_CNN_16_128_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,828         0,810        0,817
PCA_10_CNN_19_2