In [1]:
import os
import pandas as pd
import numpy as np

def load_dataframes(dir = "datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = [d for d in datasets if d.endswith(".csv")]

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        df.head()
        dfs.append({"file": dataset, "dataframe": df})

    return dfs

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from numpy import mean
from sklearn.preprocessing import StandardScaler

dfs = load_dataframes()

# 10-fold CV
kf = KFold(n_splits=10, random_state=42, shuffle=True)

for df in dfs:
    dataset_name = df["file"].replace('.csv', '')
    print(dataset_name)
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe
    
    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    results = []
    for i in range(2, 11):
        dtc = DecisionTreeClassifier(criterion="entropy", max_depth=i)
        scores = cross_val_score(dtc, X, y, scoring='accuracy', cv=kf)
        results.append('{:.3f}'.format(mean(scores)).replace('.', ','))
            
    results_df = pd.DataFrame([results], columns=range(2, 11))
    results_df.to_csv(f'results/DTree_fold_{dataset_name}.csv', index=False)
    print(results_df)

PCA_10_CNN_16_256_MAX
      2      3      4      5      6      7      8      9      10
0  0,821  0,843  0,832  0,837  0,836  0,821  0,820  0,821  0,822
PCA_10_CNN_16_128_AVG
      2      3      4      5      6      7      8      9      10
0  0,723  0,759  0,764  0,776  0,776  0,764  0,763  0,761  0,758
CNN_16_128_AVG
      2      3      4      5      6      7      8      9      10
0  0,650  0,664  0,697  0,691  0,696  0,697  0,688  0,705  0,705
CNN_19_256_AVG
      2      3      4      5      6      7      8      9      10
0  0,793  0,828  0,828  0,838  0,848  0,836  0,832  0,825  0,838
CNN_16_128_MAX
      2      3      4      5      6      7      8      9      10
0  0,696  0,707  0,742  0,728  0,724  0,723  0,727  0,733  0,707
PCA_10_CNN_16_256_AVG
      2      3      4      5      6      7      8      9      10
0  0,763  0,787  0,826  0,832  0,827  0,833  0,826  0,843  0,835
CNN_16_256_MAX
      2      3      4      5      6      7      8      9      10
0  0,785  0,801  0,818  0,807