In [7]:
import os
import pandas as pd

def load_dataframes(dir="datasets/bests/"):
    datasets = os.listdir(dir)
    datasets = [d for d in datasets if d.endswith(".csv") and d.startswith("PCA")]

    dfs = []
    for dataset in datasets:
        df = pd.read_csv(dir + dataset, encoding="utf-8")
        dfs.append({"file": dataset, "dataframe": df})

    return dfs

In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from numpy import mean
from sklearn.preprocessing import StandardScaler

dfs = load_dataframes()

# 10-fold CV
kf = KFold(n_splits=10, random_state=42, shuffle=True)

for df in dfs:
    dataset_name = df["file"].replace('.csv', '') 
    print(dataset_name)
    df = df["dataframe"]
    df_without_class = df.drop(["classe"], axis=1)
    X = df_without_class
    y = df.classe
    
    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    results = []
    for i in range(1, 11):
        knn = KNeighborsClassifier(n_neighbors=i, metric='euclidean')
        scores = cross_val_score(knn, X, y, scoring='accuracy', cv=kf)
        results.append(f"{scores.mean():.3f}".replace(".", ","))
            
    results_df = pd.DataFrame([results], columns=range(1, 11))
    results_df.to_csv(f'results/knn_fold_{dataset_name}.csv', index=False)
    print(results_df)

PCA_10_CNN_16_256_MAX
      1      2      3      4      5      6      7      8      9      10
0  0,857  0,863  0,858  0,868  0,878  0,871  0,871  0,870  0,877  0,883
PCA_10_CNN_16_128_AVG
      1      2      3      4      5      6      7      8      9      10
0  0,773  0,779  0,797  0,792  0,802  0,792  0,808  0,807  0,806  0,801
PCA_10_CNN_16_256_AVG
      1      2      3      4      5      6      7      8      9      10
0  0,842  0,836  0,850  0,843  0,867  0,852  0,876  0,862  0,861  0,860
PCA_10_CNN_19_256_AVG
      1      2      3      4      5      6      7      8      9      10
0  0,838  0,842  0,867  0,855  0,868  0,862  0,871  0,861  0,873  0,865
PCA_10_CNN_16_128_MAX
      1      2      3      4      5      6      7      8      9      10
0  0,768  0,778  0,801  0,810  0,826  0,811  0,822  0,823  0,818  0,816
PCA_10_CNN_19_256_MAX
      1      2      3      4      5      6      7      8      9      10
0  0,870  0,863  0,886  0,880  0,880  0,877  0,882  0,885  0,886  0,878
