In [24]:
import os
import pandas as pd

def load_dataframes(dir="datasets/bests/"):
    datasets = os.listdir(dir)
    datasets.sort()
    datasets = [d for d in datasets if d.endswith(".csv")]

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        df.head()
        dfs.append({"file": dataset, "dataframe": df})

    return dfs

In [25]:
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler

dfs = load_dataframes()

for df in dfs:
    dataset_name = df["file"].replace('.csv', '')
    print(dataset_name)

    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe

    # super important to normalize the data before using MultinomialNB
    X = MinMaxScaler().fit_transform(X)
    
    X_train_70, X_test_30, y_train_70, y_test_30 = train_test_split(X, y, test_size=0.3, random_state=19)
    
    gnb = GaussianNB(priors = None, var_smoothing=1e-09)
    gnb.fit(X_train_70, y_train_70)
    y_pred = gnb.predict(X_test_30)
    acc_gnb = metrics.accuracy_score(y_test_30, y_pred)
    
    mnb = MultinomialNB(fit_prior=True, alpha=1.0)
    mnb.fit(X_train_70, y_train_70)
    y_pred = mnb.predict(X_test_30)
    acc_mnb = metrics.accuracy_score(y_test_30, y_pred)
    
    cnb = ComplementNB(alpha=1.0, force_alpha=True, fit_prior=True)
    cnb.fit(X_train_70, y_train_70)
    y_pred = cnb.predict(X_test_30)
    acc_cnb = metrics.accuracy_score(y_test_30, y_pred)
    
    row = {"GaussianNB": f"{acc_gnb:.3f}".replace('.', ','),
     "MultinomialNB": f"{acc_mnb:.3f}".replace('.', ','), "ComplementNB": f"{acc_cnb:.3f}".replace('.', ',')}
    
    results_df = pd.DataFrame(row, index=[0])
    results_df.to_csv(f'results/NB_holdout_{dataset_name}.csv', index=False)
    print(results_df)


CNN_16_128_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,762         0,850        0,821
CNN_16_128_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,800         0,829        0,812
CNN_16_256_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,838         0,896        0,850
CNN_16_256_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,812         0,892        0,842
CNN_19_256_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,871         0,900        0,871
CNN_19_256_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,829         0,900        0,854
PCA_10_CNN_16_128_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,833         0,825        0,833
PCA_10_CNN_16_128_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,842         0,842        0,846
PCA_10_CNN_16_256_AVG
  GaussianNB MultinomialNB ComplementNB
0      0,883         0,871        0,875
PCA_10_CNN_16_256_MAX
  GaussianNB MultinomialNB ComplementNB
0      0,858         0,883        0,858
PCA_10_CNN_19_256_AVG
