In [67]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
import pickle

from librep.transforms.fft import FFT
from librep.datasets.har.loaders import PandasMultiModalLoader
from librep.utils.dataset import PandasDatasetsIO
from librep.datasets.multimodal import PandasMultiModalDataset
from librep.datasets.multimodal.operations import DatasetWindowedTransform
from librep.datasets.multimodal.operations import (
    DatasetFitter,
    DatasetPredicter,
    DatasetWindowedTransform,
    DatasetY,
    DatasetEvaluator,
    DatasetTransformer,
)

from umap import UMAP

In [68]:
labels_activity = {
    0: "sit",
    1: "stand",
    2: "walk",
    3: "stair up",
    4: "stair down",
    5: "run",
    6: "stair up and down",
}

datasets = [
    "KuHar",
    "MotionSense",
    "UCI",
    "WISDM",
    "RealWorld",
]

umap_models = {
    dataset: None for dataset in datasets
}

datas = {
    "old": {
        datasets: None for datasets in datasets
    },

    "new": {
        datasets: None for datasets in datasets
    }
}

In [69]:
def aplly_fft(data):

    transformer = DatasetWindowedTransform(
        transform=FFT(centered=True), 
        do_fit=False, 
        new_suffix=".fft"
    )

    data_fft = transformer(data)

    return data_fft

def generate_chart(
        df, 
        title, 
        labels,
        hovertext: str = "activity"
    ):
    fig = go.Figure()
    for key, sub_def in df.groupby('type data'):
        fig.add_trace(
            go.Scatter (
                x=sub_def['x'],
                y=sub_def['y'],
                name=key,       # this sets its legend entry
                mode='markers',
                marker={'size': 4, 'color':'white'} if key == 'unbalanced' else {'size': 4},
                hovertext=sub_def["activity"]
                # "standard activity code"
                # color='gray',
            )
        )
    # fig.update_xaxes(range = [min_x,max_x])
    # fig.update_yaxes(range = [min_y,max_y])

    fig.update_layout (
        title={
            'text': title,
            'y':0.95,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
        hovermode='x',
        xaxis_title="X",
        yaxis_title="Y",
        legend_title=title,
        template='plotly', 
        # template='ggplot2', 

        # You can see another thems on https://plotly.com/python/templates/        
        # Templates configuration
        # -----------------------
        #     Default template: 'plotly'
        #     Available templates:
        #         ['ggplot2', 'seaborn', 'simple_white', 'plotly',
        #          'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
        #          'ygridoff', 'gridon', 'none']

        width=900,
        height=900,
        font=dict(
            size=15,
        )
    )

    fig.show()

def plot_umap(
        dataset, 
        umap_models, 
        labels_activity,
        fft: bool = True, 
        view: str = "new",
):

    # Load unbalanced data
    processed_view_path = Path(f"/home/patrick/Downloads/data/unbalanced") / dataset
    loader = PandasDatasetsIO(
        processed_view_path,
        train_filename="standartized_unbalanced.csv",
        validation_filename=None,
        test_filename=None,
    )
    data_processed = loader.load(
        load_train=True,
        load_validation=False,
        load_test=False,
    )
    features = ["accel-x", "accel-y", "accel-z", "gyro-x", "gyro-y", "gyro-z"]

    # features_columns = []
    # for col in data_processed.data.columns:
    #     for feature in features:
    #         if feature in col:
    #             features_columns.append(col)

    data_processed = PandasMultiModalDataset(
        pd.DataFrame(data_processed[0]),
        feature_prefixes=features,
        label_columns=["standard activity code"],
        as_array=True
    )

    data_processed.data = data_processed.data[data_processed.data["standard activity code"] != -1]
    data_processed.data.reset_index(drop=True, inplace=True)

    data_processed_fft = aplly_fft(data_processed)
        
    if umap_models[dataset] is None:
        umap_model = UMAP(
            n_components=2,
            random_state=42,
        )

        umap_model.fit(data_processed_fft.X)
        umap_models[dataset] = umap_model
    
    else:
        umap_model = umap_models[dataset]

    data_reduced = pd.DataFrame(umap_model.transform(data_processed_fft.X), columns=["x", "y"])

    if view == "new":
        processed_view_path = Path(f"../data/standartized_balanced") / dataset
    else:
        processed_view_path = Path(f"/home/patrick/Downloads/data/standartized_balanced") / dataset
   
    train_processed, test_processed = PandasMultiModalLoader(
        processed_view_path
    ).load(label="standard activity code", concat_train_validation=True)

    train_processed.data = train_processed.data[train_processed.data["standard activity code"] != -1]
    train_processed.data.reset_index(drop=True, inplace=True)

    test_processed.data = test_processed.data[test_processed.data["standard activity code"] != -1]
    test_processed.data.reset_index(drop=True, inplace=True)

    if fft:
        train = aplly_fft(train_processed)
        test = aplly_fft(test_processed)
    else:
        train = train_processed.copy()
        test = test_processed.copy()

    train_reduced = pd.DataFrame(umap_model.transform(train.X), columns=["x", "y"])
    test_reduced = pd.DataFrame(umap_model.transform(test.X), columns=["x", "y"])

    train_reduced["standard activity code"] = train_processed.data["standard activity code"]
    train_reduced["user"] = train_processed.data["user"]
    train_reduced["type data"] = "train"

    test_reduced["standard activity code"] = test_processed.data["standard activity code"]
    test_reduced["user"] = test_processed.data["user"]
    test_reduced["type data"] = "test"

    all_data = data_reduced.copy()
    all_data["standard activity code"] = data_processed.data["standard activity code"]
    all_data["user"] = data_processed.data["user"]
    all_data["type data"] = "unbalanced"

    all_data = pd.concat([all_data, train_reduced, test_reduced], axis=0)
    all_data["activity"] = all_data["standard activity code"].apply(lambda x: labels_activity[x])

    # Now plot the results

    title = f"Plot from {dataset} dataset with UMAP-2 on FFT - View: {view}"

    return title, all_data


In [72]:
for view in ["old", "new"]:
    for dataset in datasets:
        title, all_data = plot_umap(
            dataset,
            umap_models,
            labels_activity,
            fft=True,
            view=view
        )

        datas[view][dataset] = all_data



Columns (366) have mixed types. Specify dtype option on import or set low_memory=False.


Columns (366) have mixed types. Specify dtype option on import or set low_memory=False.



In [None]:
# Salvando o dicionario de dados

with open(f"umap_data.pkl", "wb") as f:
    pickle.dump(datas, f)
