In [1]:
import numpy as np
import pandas as pd
import tqdm
from scipy import signal
from pathlib import Path

from dataset_processor import (
    AddGravityColumn,
    Convert_G_to_Ms2,
    ButterworthFilter,
    Resampler,
    Windowize,
    AddStandardActivityCode,
    RenameColumns,
    Pipeline
)

In [2]:
def read_motionsense(motionsense_path):
    activity_names = {0: "dws", 1: "ups", 2: "sit", 3: "std", 4: "wlk", 5: "jog"}
    activity_codes = {v: k for k, v in activity_names.items()}
    motionsense_path = Path(motionsense_path)

    feature_dtypes = {
        "attitude.roll": np.float32,
        "attitude.pitch": np.float32,
        "attitude.yaw": np.float32,
        "gravity.x": np.float32,
        "gravity.y": np.float32,
        "gravity.z": np.float32,
        "rotationRate.x": np.float32,
        "rotationRate.y": np.float32,
        "rotationRate.z": np.float32,
        "userAcceleration.x": np.float32,
        "userAcceleration.y": np.float32,
        "userAcceleration.z": np.float32,
    }

    dfs = []
    for i, f in enumerate(sorted(motionsense_path.rglob("*.csv"))):
        # Pegando o nome da atividade
        activity_name = f.parents[0].name
        # Pariticiona o nome da atividade em o cóigo da corrida
        activity_name, serial = activity_name.split("_")
        activity_code = activity_codes[activity_name]

        user = int(f.stem.split("_")[1])
        df = pd.read_csv(
            f, names=list(feature_dtypes.keys()), dtype=feature_dtypes, skiprows=1
        )

        # ----- Adiciona colunas auxiliares e meta-dados ------
        df["activity code"] = activity_code
        df["index"] = range(len(df))
        df["user"] = user
        df["serial"] = serial
        df["csv"] = "/".join(f.parts[-2:])
        # ----------------------------------------------------
        dfs.append(df)

    return pd.concat(dfs)


def users(df):
    return np.sort(df["user"].unique()).tolist()


def activities(df):
    return np.sort(df["activity code"].unique()).tolist()




In [3]:
motionsense_path = "data/raw/MotionSense/A_DeviceMotion_data"

columns_to_rename = {
    "userAcceleration.x": "accel-x",
    "userAcceleration.y": "accel-y",
    "userAcceleration.z": "accel-z",
    "rotationRate.x": "gyro-x",
    "rotationRate.y": "gyro-y",
    "rotationRate.z": "gyro-z",
}

feature_columns = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z",
    "attitude.roll",
    "attitude.pitch",
    "attitude.yaw",
    "gravity.x",
    "gravity.y",
    "gravity.z",
]

column_group = ["user", "activity code", "serial"]

# activity code: standard activity code
standard_activity_code_map = {
    0: 4,
    1: 3,
    2: 0,
    3: 1,
    4: 2,
    5: 5
}


## Bruto

In [4]:
dataframe = read_motionsense(motionsense_path)

renamer = RenameColumns(
    columns_map=columns_to_rename,
)

windowizer = Windowize(
    features_to_select=feature_columns,
    samples_per_window=150,
    samples_per_overlap=0,
    groupby_column="csv",
)

standard_label_adder = AddStandardActivityCode(standard_activity_code_map)

pipeline = Pipeline(
    [
        renamer,
        windowizer,
        standard_label_adder
    ]
)

new_df = pipeline(dataframe)
new_df

Executing RenameColumns
Executing Windowize


Creating windows: 100%|██████████| 360/360 [00:46<00:00,  7.78it/s]


Executing AddStandardActivityCode


Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gravity.z-146,gravity.z-147,gravity.z-148,gravity.z-149,index,serial,user,csv,activity code,standard activity code
0,0.294894,0.219405,0.010714,-0.008389,0.199441,0.168241,0.079382,0.06936,0.072889,0.098347,...,-0.087946,-0.072701,-0.062489,-0.057112,0.0,1,1,dws_1/sub_1.csv,0,4
1,0.422041,0.489289,0.405519,0.330101,0.278058,0.26667,-0.038128,-0.113882,0.040205,0.70531,...,-0.429028,-0.438791,-0.427743,-0.398845,150.0,1,1,dws_1/sub_1.csv,0,4
2,0.050002,0.279396,0.397511,0.489769,0.376962,0.086257,0.297043,0.189549,-0.11724,-0.219285,...,-0.060859,-0.035123,-0.006991,0.021725,300.0,1,1,dws_1/sub_1.csv,0,4
3,0.453086,0.428134,0.145774,0.035071,-0.073498,-0.076478,-0.071926,-0.070026,-0.09827,-0.088027,...,0.034487,0.025593,0.009984,-0.012116,450.0,1,1,dws_1/sub_1.csv,0,4
4,-0.12326,-0.197224,0.104588,0.464974,0.476676,-0.173624,-0.311906,-0.396358,-0.354741,-0.343695,...,0.0168,-0.017304,-0.065986,-0.116997,600.0,1,1,dws_1/sub_1.csv,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9237,-0.219509,0.039405,0.177356,0.163627,0.080003,-0.014922,-0.057913,0.004133,0.091218,0.165302,...,-0.615996,-0.563828,-0.51997,-0.519647,3900.0,8,9,wlk_8/sub_9.csv,4,2
9238,0.41574,0.001829,-0.467739,-0.385918,0.186728,0.043864,-0.726447,-0.768545,-0.734806,-0.569898,...,-0.329562,-0.392729,-0.444477,-0.492672,4050.0,8,9,wlk_8/sub_9.csv,4,2
9239,-0.128104,0.136487,0.138946,-0.044175,-0.09986,-0.104913,-0.211261,-0.335558,-0.412587,-0.413217,...,-0.293291,-0.255729,-0.223467,-0.191943,4200.0,8,9,wlk_8/sub_9.csv,4,2
9240,-0.198929,-0.270276,-0.092972,0.201214,0.290746,0.130444,-0.023268,0.024589,0.179717,0.089722,...,-0.643083,-0.595136,-0.550171,-0.534283,4350.0,8,9,wlk_8/sub_9.csv,4,2


## Normatizado

In [5]:
dataframe = read_motionsense(motionsense_path)

renamer = RenameColumns(
    columns_map=columns_to_rename,
)

add_gravity = AddGravityColumn(
    axis_columns=["accel-x", "accel-y", "accel-z"],
    gravity_columns=["gravity.x", "gravity.y", "gravity.z"],
)

conversor = Convert_G_to_Ms2(
    axis_columns=["accel-x", "accel-y", "accel-z"],
)

butterworth = ButterworthFilter(
    axis_columns=["accel-x", "accel-y", "accel-z"],
    fs=50
)

resampler = Resampler(
    groupby_column=column_group,
    features_to_select=feature_columns,
    original_fs=50,
    target_fs=20,
)

windowizer = Windowize(
    features_to_select=feature_columns,
    samples_per_window=60,
    samples_per_overlap=0,
    groupby_column="csv",
)

standard_label_adder = AddStandardActivityCode(standard_activity_code_map)

pipeline = Pipeline(
    [
        renamer,
        add_gravity,
        conversor,
        butterworth,
        resampler,
        windowizer,
        standard_label_adder
    ]
)

new_df_normalized = pipeline(dataframe)
new_df_normalized

Executing RenameColumns
Executing AddGravityColumn
Executing Convert_G_to_Ms2
Executing ButterworthFilter
Executing Resampler


Resampling: 100%|██████████| 360/360 [00:04<00:00, 72.27it/s]


Executing Windowize


Creating windows: 100%|██████████| 360/360 [00:44<00:00,  8.15it/s]


Executing AddStandardActivityCode


Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gravity.z-57,gravity.z-58,gravity.z-59,index,level_0,serial,user,csv,activity code,standard activity code
0,2.288706,-2.198357,-0.291764,-1.287203,-1.352125,-2.150854,-0.56075,-5.668985,-4.820846,-4.712508,...,-0.168824,-0.107589,-0.064963,0.0,0.0,1,1,dws_1/sub_1.csv,0,4
1,2.36918,2.071391,-0.186089,-1.266858,9.145859,-3.198091,-8.636468,-5.221671,-5.435234,0.85808,...,-0.2317,-0.403346,-0.429845,60.0,60.0,1,1,dws_1/sub_1.csv,0,4
2,0.169915,2.574639,1.753801,-1.669721,-1.080261,1.21426,-0.776808,5.146641,2.479351,0.875143,...,-0.093774,-0.076443,-0.012494,120.0,120.0,1,1,dws_1/sub_1.csv,0,4
3,3.841507,-0.419048,-1.278237,-1.922591,-0.848369,1.197113,2.96001,0.81453,3.038822,1.065182,...,0.078148,0.03872,0.015126,180.0,180.0,1,1,dws_1/sub_1.csv,0,4
4,-0.865185,4.523849,-0.97776,-3.88586,-3.891317,-0.431525,3.136046,-2.38584,-4.333047,2.620828,...,0.033785,0.032218,-0.063874,240.0,240.0,1,1,dws_1/sub_1.csv,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9237,-2.369065,-5.625988,4.408824,4.987852,-3.090578,-0.764294,-9.564993,5.71272,1.401514,-3.06484,...,-0.586026,-0.64633,-0.701213,1560.0,1560.0,8,9,wlk_8/sub_9.csv,4,2
9238,2.028923,2.728628,-6.761029,0.693568,1.207147,0.937654,-2.088913,-2.3388,-5.248304,2.460102,...,-0.086497,-0.045973,-0.053208,1620.0,1620.0,8,9,wlk_8/sub_9.csv,4,2
9239,-1.824248,-1.922468,-0.257376,1.177002,2.343056,1.322935,4.192637,4.089555,-6.411589,-3.597744,...,-0.557906,-0.497429,-0.387866,1680.0,1680.0,8,9,wlk_8/sub_9.csv,4,2
9240,-4.962596,5.842322,7.724271,-6.507734,0.841843,-7.028653,-0.62132,7.31031,-3.371698,-0.089727,...,-0.665227,-0.718157,-0.730388,1740.0,1740.0,8,9,wlk_8/sub_9.csv,4,2
