In [17]:
import os
import pandas as pd

def load_dataframes(dir = "datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = [d for d in datasets if d.endswith(".csv")]

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        df.head()
        dfs.append({"file": dataset, "dataframe": df})

    return dfs

In [18]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

dfs = load_dataframes()

for df in dfs:
    dataset_name = df["file"].replace('.csv', '')
    print(dataset_name)
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe
    
    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X_train_70, X_test_30, y_train_70, y_test_30 = train_test_split(X, y, test_size=0.3, random_state=42)
    
    results = []
    for i in range(2, 11):
        dtc = DecisionTreeClassifier(criterion="entropy", max_depth=i)

        # Train Decision Tree Classifier
        dtc = dtc.fit(X_train_70, y_train_70)

        # Predict the response for test dataset
        y_pred = dtc.predict(X_test_30)

        acuracia = metrics.accuracy_score(y_test_30, y_pred)
        results.append('{:.3f}'.format(acuracia).replace('.', ','))
            
    results_df = pd.DataFrame([results], columns=range(2, 11))
    results_df.to_csv(f'results/DTree_holdout_{dataset_name}.csv', index=False)
    print(results_df)

PCA_10_CNN_16_256_MAX
      2      3      4      5      6      7      8      9      10
0  0,800  0,829  0,796  0,854  0,829  0,829  0,829  0,829  0,833
PCA_10_CNN_16_128_AVG
      2      3      4      5      6      7      8      9      10
0  0,683  0,746  0,779  0,771  0,779  0,771  0,771  0,779  0,779
CNN_16_128_AVG
      2      3      4      5      6      7      8      9      10
0  0,537  0,679  0,717  0,717  0,717  0,717  0,738  0,721  0,725
CNN_19_256_AVG
      2      3      4      5      6      7      8      9      10
0  0,775  0,804  0,804  0,808  0,821  0,796  0,800  0,817  0,821
CNN_16_128_MAX
      2      3      4      5      6      7      8      9      10
0  0,713  0,725  0,725  0,708  0,688  0,688  0,700  0,729  0,696
PCA_10_CNN_16_256_AVG
      2      3      4      5      6      7      8      9      10
0  0,738  0,787  0,783  0,804  0,804  0,808  0,821  0,812  0,812
CNN_16_256_MAX
      2      3      4      5      6      7      8      9      10
0  0,750  0,758  0,746  0,733