In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

from librep.datasets.har.loaders import PandasMultiModalLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from umap import UMAP
from librep.transforms.fft import FFT
from librep.datasets.multimodal.operations import DatasetWindowedTransform
from sklearn.ensemble import RandomForestClassifier
from umap import UMAP
from librep.datasets.multimodal.operations import (
    DatasetFitter,
    DatasetPredicter,
    DatasetWindowedTransform,
    DatasetY,
    DatasetEvaluator,
    DatasetTransformer,
)
from librep.metrics.report import ClassificationReport

def aplly_fft(train, val, test):

    transformer = DatasetWindowedTransform(transform=FFT(centered=True), do_fit=False, new_suffix=".fft")
    train_fft, val_fft, test_fft = transformer(train), transformer(val), transformer(test)

    return train_fft, val_fft, test_fft

def train_models(info, train_processed, validation_processed, test_processed, umap: bool = False, reduce: str = "all", n_components: int = 2):
    
    train_fft, val_fft, test_fft = aplly_fft(train_processed, validation_processed, test_processed)
    train_fft = train_fft.concatenate(val_fft)

    if umap:
        if reduce == "all":
            model_reducer = UMAP(n_components=n_components, random_state=42)     
            train_fft.X = model_reducer.fit_transform(train_fft.X)
            test_fft.X = model_reducer.transform(test_fft.X)

        elif reduce == "sensor":
            model_reducer_acc = UMAP(n_components=n_components, random_state=42)     
            train_fft_acc = model_reducer_acc.fit_transform(train_fft.X[:,:90])
            test_fft_acc = model_reducer_acc.transform(test_fft.X[:,:90])

            model_reducer_gyr = UMAP(n_components=n_components)     
            train_fft_gyr = np.array(model_reducer_gyr.fit_transform(train_fft.X[:,90:]))
            test_fft_gyr = np.array(model_reducer_gyr.transform(test_fft.X[:,90:]))

            train_fft.X = np.concatenate((train_fft_acc, train_fft_gyr), axis=1)
            test_fft.X = np.concatenate((test_fft_acc, test_fft_gyr), axis=1)
        else:
            data = {
                "train": [],
                "test": []
            }
            for i in range(6):
                model_reducer = UMAP(n_components=n_components, random_state=42)
                train = np.array(model_reducer.fit_transform(train_fft.X[:,i*30: (i+1)*30]))
                test = np.array(model_reducer.transform(test_fft.X[:,i*30: (i+1)*30]))

                data["train"].append(train)
                data["test"].append(test)

            data["train"] = tuple(data["train"])  
            data["test"] = tuple(data["test"])
            train_fft.X = np.concatenate(data["train"], axis=1)
            test_fft.X = np.concatenate(data["test"], axis=1)

    models = {
        "RandomForest": RandomForestClassifier(),
        "SVC": SVC(),
        "KNN": KNeighborsClassifier(n_neighbors=5)
    }

    for classifier, model in models.items():

        DatasetFitter(model, use_y=True)(train_fft)
        y_pred = DatasetPredicter(model)(test_fft)
        y_true = DatasetY()(test_fft)

        accuracy = DatasetEvaluator(ClassificationReport(normalize="all", plot_confusion_matrix=False))(y_true, y_pred)['accuracy']
        info[classifier + " - acc"].append(accuracy)

    info["Type reducer"].append(reduce)
    info["Total of features"].append(train_fft.X.shape[1])
    
    return info


In [3]:
datasets = [
    "KuHar",
    "MotionSense",
    "UCI",
    "WISDM",
    "RealWorld",
]


info_classifiers = {
    "Dataset": [],
    "RandomForest - acc": [],
    "SVC - acc": [],
    "KNN - acc": [],
    "Type reducer": [],
    "Total of features": []

}

for dataset in datasets:

    for reduce in ["all", "sensor", "axis"]:

        info_classifiers["Dataset"].append(dataset)

        processed_view_path = Path(f"/home/patrick/Downloads/data/standartized_balanced") / dataset
        train_processed, validation_processed, test_processed = PandasMultiModalLoader(
            processed_view_path
        ).load(label="standard activity code")

        info_classifiers = train_models(
            info_classifiers,
            train_processed, validation_processed, 
            test_processed, 
            umap=False, 
            reduce=reduce,
        )

        for n_components in [2, 5, 10]:

            processed_view_path = Path(f"/home/patrick/Downloads/data/standartized_balanced") / dataset
            train_processed, validation_processed, test_processed = PandasMultiModalLoader(
                processed_view_path
            ).load(label="standard activity code")

            info_classifiers["Dataset"].append(dataset)

            info_classifiers = train_models(
                info_classifiers, 
                train_processed, validation_processed, 
                test_processed, 
                umap=True, 
                reduce=reduce,
                n_components=n_components
            )


In [4]:
for key, value in info_classifiers.items():
    print(key, len(value))

Dataset 60
RandomForest - acc 60
SVC - acc 60
KNN - acc 60
Type reducer 60
Total of features 60


In [5]:
df = pd.DataFrame(info_classifiers)
df[(df["Type reducer"] == "all") & (df["Total of features"] == 180)]

Unnamed: 0,Dataset,RandomForest - acc,SVC - acc,KNN - acc,Type reducer,Total of features
0,KuHar,0.8125,0.722222,0.888889,all,180
12,MotionSense,0.928437,0.830508,0.909605,all,180
24,UCI,0.927536,0.795652,0.823188,all,180
36,WISDM,0.853621,0.804314,0.862866,all,180
48,RealWorld,0.723833,0.715436,0.666782,all,180


In [2]:
import yaml

with open("/home/patrick/Documents/Repositories/hiaac-m4-experiments/experiments/experiment_executor/experiments/reducer_comb_1_not_intra/standartized_balanced02074.yaml", 'r') as f:
  file = yaml.load(f)


  file = yaml.load(f)


In [7]:
# files_path = "/home/patrick/Documents/Repositories/hiaac-m4-experiments/experiments/experiment_executor/experiments/test_executer"
# dataset = 'kuhar'
# reduce = 'all'
# with open(f"{files_path}/{dataset}_{reduce}_2.yaml", 'w') as f:
#     yaml.dump(file, f)

In [8]:
file["reducer_dataset"]

['wisdm.standartized_balanced[train]',
 'wisdm.standartized_balanced[validation]']

In [3]:
files_path = "/home/patrick/Documents/Repositories/hiaac-m4-experiments/experiments/experiment_executor/experiments/test_executer"

# Criando arquivos de configuração yaml para cada dataset e experimento

reduce_on = ["all", "sensor", "axis"]
in_use_features = ["accel-x", "accel-y", "accel-z", "gyro-x", "gyro-y", "gyro-z"]
n_components = [2, 5, 10]

datasets = [
    'kuhar',
    "motionsense",
    "uci",
    "wisdm",
    "realworld"
]

for dataset in datasets:
    new_file = file.copy()
    file["reducer_dataset"] = [f'{dataset}.standartized_balanced[train]', f'{dataset}.standartized_balanced[validation]']
    file["test_dataset"] = [f'{dataset}.standartized_balanced[test]']
    file["train_dataset"] = [f'{dataset}.standartized_balanced[train]', f'{dataset}.standartized_balanced[validation]']
    for reduce in reduce_on:
        new_file["extra"]["reduce_on"] = reduce
        for n in n_components:
            new_file["reducer"]["kwargs"]["n_components"] = n
            new_file["reducer"]["name"] = f"umap-{n}"
            
            with open(f"{files_path}/{dataset}_{reduce}_{n}.yaml", 'w') as f:
                yaml.dump(file, f)

In [5]:
import os
path = "/home/patrick/Documents/Repositories/hiaac-m4-experiments/experiments/experiment_executor/results/teste_executer/execution/"

# Carregando resultados de todos os experimentos em path

results = []
for file in os.listdir(path):
    if file.endswith(".yaml"):
        with open(path+file, 'r') as f:
            results.append(yaml.load(f))

  results.append(yaml.load(f))


In [54]:
# # Corrigindo arquivos

# path = "/home/patrick/Documents/Repositories/hiaac-m4-experiments/experiments/experiment_executor/experiments/test_executer/"

# # Carregando resultados de todos os experimentos em path

# results = []
# for file in os.listdir(path):
#     if file.endswith(".yaml"):
#         with open(path+file, 'r') as f:
#             experiment = yaml.load(f)
#             experiment["reducer"]["kwargs"]["random_state"] = 42
#         with open(path+file, 'w') as f:
#             yaml.dump(experiment, f)

  experiment = yaml.load(f)


In [6]:
datasets = {
    'kuhar.standartized_balanced[test]': "KuHar",
    "motionsense.standartized_balanced[test]": "MotionSense",
    "uci.standartized_balanced[test]": "UCI",
    "wisdm.standartized_balanced[test]": "WISDM",
    "realworld.standartized_balanced[test]": "RealWorld",
}

new_df = {
    "Dataset": [],
    "RandomForest - acc": [],
    "SVC - acc": [],
    "KNN - acc": [],
    "Type reducer": [],
    "n_components": []
}

for result in results:
    dataset = datasets[result["experiment"]["test_dataset"][0]]
    new_df["Dataset"].append(dataset)

    new_df["RandomForest - acc"].append(np.mean([
        acc["result"][0]["accuracy"] for acc in result["report"][0]["results"]['runs']
    ]))

    new_df["KNN - acc"].append(np.mean([
        acc["result"][0]["accuracy"] for acc in result["report"][1]["results"]['runs']
    ]))

    new_df["SVC - acc"].append(np.mean([
        acc["result"][0]["accuracy"] for acc in result["report"][2]["results"]['runs']
    ]))

    new_df["Type reducer"].append(result["experiment"]["extra"]["reduce_on"])

    new_df["n_components"].append(result["experiment"]["reducer"]["kwargs"]["n_components"])

new_df = pd.DataFrame(new_df)

# Ordenando os resultados por Type reducer e n_components

new_df = new_df.sort_values(by=["Type reducer", "n_components"])
new_df


Unnamed: 0,Dataset,RandomForest - acc,SVC - acc,KNN - acc,Type reducer,n_components
9,MotionSense,0.879284,0.892655,0.877589,all,2
11,RealWorld,0.63009,0.598229,0.634231,all,2
30,KuHar,0.809722,0.763889,0.805556,all,2
41,UCI,0.751014,0.686957,0.752174,all,2
44,WISDM,0.797874,0.821572,0.79322,all,2
1,MotionSense,0.916478,0.904896,0.913371,all,5
2,KuHar,0.789583,0.763889,0.805556,all,5
18,WISDM,0.839353,0.8453,0.835747,all,5
21,RealWorld,0.65283,0.654819,0.65804,all,5
34,UCI,0.814058,0.791304,0.794203,all,5


In [7]:
dataset = "MotionSense"
new_df[(new_df["Dataset"] == dataset) & (new_df["n_components"] == 2)]

Unnamed: 0,Dataset,RandomForest - acc,SVC - acc,KNN - acc,Type reducer,n_components
9,MotionSense,0.879284,0.892655,0.877589,all,2
14,MotionSense,0.933051,0.917137,0.915254,axis,2
6,MotionSense,0.899623,0.881356,0.89548,sensor,2


In [9]:
# Carregando os dados results.yaml

with open("results.yaml", "r") as f:
    results = yaml.load(f)
df = pd.DataFrame(results)
df

  results = yaml.load(f)


Unnamed: 0,Dataset,KNN - acc,RandomForest - acc,SVC - acc,Total of features,Type reducer
0,KuHar,0.888889,0.8125,0.722222,180,all
1,KuHar,0.798611,0.798611,0.770833,2,all
2,KuHar,0.805556,0.8125,0.756944,5,all
3,KuHar,0.819444,0.826389,0.75,10,all
4,KuHar,0.888889,0.854167,0.722222,180,sensor
5,KuHar,0.875,0.854167,0.854167,4,sensor
6,KuHar,0.875,0.854167,0.861111,10,sensor
7,KuHar,0.881944,0.881944,0.868056,20,sensor
8,KuHar,0.888889,0.8125,0.722222,180,axis
9,KuHar,0.881944,0.888889,0.861111,12,axis


In [10]:
# Ordenando os resultados por Type reducer

df = df.sort_values(by=["Type reducer"])

df[(df["Dataset"] == dataset) & (df["Total of features"].isin([2, 4, 12]))]

Unnamed: 0,Dataset,KNN - acc,RandomForest - acc,SVC - acc,Total of features,Type reducer
13,MotionSense,0.878531,0.879473,0.889831,2,all
21,MotionSense,0.922787,0.935028,0.92467,12,axis
17,MotionSense,0.898305,0.899247,0.890772,4,sensor


In [11]:
df.reset_index(drop=True, inplace=True)
df2 = df[df["Total of features"] != 180].copy()

# Ordenando os resultados por Dataset

df2 = df2.sort_values(by=["Dataset", "Type reducer"]).reset_index(drop=True)
df2

Unnamed: 0,Dataset,KNN - acc,RandomForest - acc,SVC - acc,Total of features,Type reducer
0,KuHar,0.798611,0.798611,0.770833,2,all
1,KuHar,0.805556,0.8125,0.756944,5,all
2,KuHar,0.819444,0.826389,0.75,10,all
3,KuHar,0.881944,0.888889,0.861111,12,axis
4,KuHar,0.888889,0.895833,0.881944,60,axis
5,KuHar,0.888889,0.881944,0.861111,30,axis
6,KuHar,0.881944,0.881944,0.868056,20,sensor
7,KuHar,0.875,0.854167,0.861111,10,sensor
8,KuHar,0.875,0.854167,0.854167,4,sensor
9,MotionSense,0.910546,0.910546,0.897363,10,all


In [12]:
new_df.reset_index(drop=True, inplace=True)
# Ordenando os resultados por Dataset e Type reducer

new_df = new_df.sort_values(by=["Dataset", "Type reducer"]).reset_index(drop=True)
new_df

Unnamed: 0,Dataset,RandomForest - acc,SVC - acc,KNN - acc,Type reducer,n_components
0,KuHar,0.809722,0.763889,0.805556,all,2
1,KuHar,0.789583,0.763889,0.805556,all,5
2,KuHar,0.8375,0.784722,0.826389,all,10
3,KuHar,0.888889,0.861111,0.840278,axis,2
4,KuHar,0.8875,0.875,0.875,axis,5
5,KuHar,0.890972,0.868056,0.888889,axis,10
6,KuHar,0.854167,0.840278,0.854167,sensor,2
7,KuHar,0.866667,0.861111,0.875,sensor,5
8,KuHar,0.888889,0.861111,0.902778,sensor,10
9,MotionSense,0.879284,0.892655,0.877589,all,2


In [36]:
columns = [
    "RandomForest - acc",
    "SVC - acc",
    "KNN - acc"
]

diff = (new_df[columns] - df2[columns]) * 100
diff["Dataset"] = df2["Dataset"]
diff["Type reducer"] = df2["Type reducer"]
# diff["n_components"] = df2["n_components"]
diff["Total of features"] = df2["Total of features"]
diff.describe()

Unnamed: 0,RandomForest - acc,SVC - acc,KNN - acc,Total of features
count,45.0,45.0,45.0,45.0
mean,0.033726,1.012424,-0.550717,17.0
std,2.34258,3.209212,2.126163,17.477258
min,-7.072464,-5.516179,-6.811594,2.0
25%,-0.414079,-0.694444,-1.388889,5.0
50%,0.037665,0.694444,-0.345066,10.0
75%,0.902778,2.162411,0.356568,20.0
max,7.028986,10.869565,4.927536,60.0


In [63]:
erros = {
    columns: [] for columns in diff.columns
}
error = 5.0
erros  = pd.DataFrame(erros)

columns = list(diff.columns)

for i in range(len(diff)):
    # Verificando se a diferença é maior que o erro em pelo menos uma das colunas
    if abs(diff[columns[:-3]].iloc[i]).max() >= error:
        erros = pd.concat([erros, diff.iloc[i:i+1]])

erros.reset_index(drop=True, inplace=True)
erros

Unnamed: 0,RandomForest - acc,SVC - acc,KNN - acc,Dataset,Type reducer,Total of features
0,3.574879,5.912123,0.954681,RealWorld,sensor,4.0
1,-7.072464,-4.782609,-6.376812,UCI,all,10.0
2,0.101449,6.376812,-1.594203,UCI,all,5.0
3,7.028986,10.869565,4.927536,UCI,all,2.0
4,-3.014493,-4.347826,-6.811594,UCI,axis,30.0
5,2.724638,6.231884,1.15942,UCI,axis,12.0
6,-2.674884,-5.516179,-3.174114,WISDM,sensor,20.0
7,1.639445,7.673344,1.140216,WISDM,sensor,4.0


In [59]:
abs(diff[columns[:-3]].iloc[i]).max(), abs(diff[columns[:-3]].iloc[i]), diff.iloc[i:i+1]

(7.673343605546979,
 RandomForest - acc    1.639445
 SVC - acc             7.673344
 KNN - acc             1.140216
 Name: 44, dtype: float64,
     RandomForest - acc  SVC - acc  KNN - acc Dataset Type reducer  \
 44            1.639445   7.673344   1.140216   WISDM       sensor   
 
     Total of features  
 44                  4  )