In [36]:

import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def load_dataframes(dir="datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = sorted([d for d in datasets if d.endswith(".csv") and d.startswith("PCA")])

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        df.head()
        dfs.append({"file": dataset, "dataframe": df})

    return dfs


def split_data_from_dataframe(df):
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe
    
    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X, y


def split_data_to_train_and_test(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )
    
    return X_train, X_test, y_train, y_test

In [37]:

from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

dfs = load_dataframes()

for df in dfs:
    dataset_name = df["file"].replace('.csv', '')
    print(dataset_name)
    X, y = split_data_from_dataframe(df)
    X_train_70, X_test_30, y_train_70, y_test_30 = split_data_to_train_and_test(X, y)
   
    results = []
    for i in range(1, 11):
        knn = KNeighborsClassifier(n_neighbors=i, metric='euclidean')
        knn.fit(X_train_70, y_train_70)

        y_pred = knn.predict(X_test_30)
    
        acuracia = metrics.accuracy_score(y_test_30, y_pred)
        results.append(f"{acuracia:.3f}".replace(".", ","))
            
    results_df = pd.DataFrame([results], columns=range(1, 11))
    results_df.to_csv(f'results/knn_holdout_{dataset_name}.csv', index=False)
    print(results_df)

PCA_10_CNN_16_128_AVG
      1      2      3      4      5      6      7      8      9      10
0  0,804  0,779  0,804  0,800  0,800  0,779  0,800  0,792  0,821  0,808
PCA_10_CNN_16_128_MAX
      1      2      3      4      5      6      7      8      9      10
0  0,775  0,771  0,796  0,808  0,808  0,792  0,796  0,779  0,787  0,792
PCA_10_CNN_16_256_AVG
      1      2      3      4      5      6      7      8      9      10
0  0,846  0,804  0,846  0,854  0,854  0,850  0,871  0,854  0,854  0,863
PCA_10_CNN_16_256_MAX
      1      2      3      4      5      6      7      8      9      10
0  0,858  0,867  0,858  0,875  0,867  0,871  0,871  0,875  0,879  0,854
PCA_10_CNN_19_256_AVG
      1      2      3      4      5      6      7      8      9      10
0  0,833  0,833  0,858  0,842  0,858  0,842  0,850  0,838  0,829  0,833
PCA_10_CNN_19_256_MAX
      1      2      3      4      5      6      7      8      9      10
0  0,879  0,854  0,846  0,863  0,867  0,867  0,867  0,854  0,863  0,863
