In [1]:
import numpy as np
import pandas as pd
import tqdm
from scipy import signal
from pathlib import Path

from dataset_processor import (
    AddGravityColumn,
    Convert_G_to_Ms2,
    ButterworthFilter,
    Resampler,
    Windowize,
    AddStandardActivityCode,
    RenameColumns,
    Pipeline
)

In [2]:
def read_motionsense(motionsense_path):
    activity_names = {0: "dws", 1: "ups", 2: "sit", 3: "std", 4: "wlk", 5: "jog"}
    activity_codes = {v: k for k, v in activity_names.items()}
    motionsense_path = Path(motionsense_path)

    feature_dtypes = {
        "attitude.roll": np.float32,
        "attitude.pitch": np.float32,
        "attitude.yaw": np.float32,
        "gravity.x": np.float32,
        "gravity.y": np.float32,
        "gravity.z": np.float32,
        "rotationRate.x": np.float32,
        "rotationRate.y": np.float32,
        "rotationRate.z": np.float32,
        "userAcceleration.x": np.float32,
        "userAcceleration.y": np.float32,
        "userAcceleration.z": np.float32,
    }

    dfs = []
    for i, f in enumerate(sorted(motionsense_path.rglob("*.csv"))):
        # Pegando o nome da atividade
        activity_name = f.parents[0].name
        # Pariticiona o nome da atividade em o cóigo da corrida
        activity_name, serial = activity_name.split("_")
        activity_code = activity_codes[activity_name]

        user = int(f.stem.split("_")[1])
        df = pd.read_csv(
            f, names=list(feature_dtypes.keys()), dtype=feature_dtypes, skiprows=1
        )

        # ----- Adiciona colunas auxiliares e meta-dados ------
        df["activity code"] = activity_code
        df["index"] = range(len(df))
        df["user"] = user
        df["serial"] = serial
        df["csv"] = "/".join(f.parts[-2:])
        # ----------------------------------------------------
        dfs.append(df)

    return pd.concat(dfs)


def users(df):
    return np.sort(df["user"].unique()).tolist()


def activities(df):
    return np.sort(df["activity code"].unique()).tolist()




In [3]:
motionsense_path = "data/raw/MotionSense/A_DeviceMotion_data"

columns_to_rename = {
    "userAcceleration.x": "accel-x",
    "userAcceleration.y": "accel-y",
    "userAcceleration.z": "accel-z",
    "rotationRate.x": "gyro-x",
    "rotationRate.y": "gyro-y",
    "rotationRate.z": "gyro-z",
}

feature_columns = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z",
    "attitude.roll",
    "attitude.pitch",
    "attitude.yaw",
    "gravity.x",
    "gravity.y",
    "gravity.z",
]

column_group = ["user", "activity code", "serial"]

# activity code: standard activity code
standard_activity_code_map = {
    0: 4,
    1: 3,
    2: 0,
    3: 1,
    4: 2,
    5: 5
}


## Bruto

In [4]:
dataframe = read_motionsense(motionsense_path)

renamer = RenameColumns(
    columns_map=columns_to_rename,
)

windowizer = Windowize(
    features_to_select=feature_columns,
    samples_per_window=150,
    samples_per_overlap=0,
)

standard_label_adder = AddStandardActivityCode(standard_activity_code_map)

pipeline = Pipeline(
    [
        renamer,
        windowizer,
        standard_label_adder
    ]
)

new_df = pipeline(dataframe)
new_df

Executing RenameColumns
Executing Windowize


Creating windows: 100%|██████████| 360/360 [00:45<00:00,  7.89it/s]


Executing AddStandardActivityCode


Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gravity.z-146,gravity.z-147,gravity.z-148,gravity.z-149,user,csv,activity code,serial,index,standard activity code
0,0.294894,0.219405,0.010714,-0.008389,0.199441,0.168241,0.079382,0.06936,0.072889,0.098347,...,-0.087946,-0.072701,-0.062489,-0.057112,1.0,dws_1/sub_1.csv,0,1,0,4
1,0.422041,0.489289,0.405519,0.330101,0.278058,0.26667,-0.038128,-0.113882,0.040205,0.70531,...,-0.429028,-0.438791,-0.427743,-0.398845,1.0,dws_1/sub_1.csv,0,1,150,4
2,0.050002,0.279396,0.397511,0.489769,0.376962,0.086257,0.297043,0.189549,-0.11724,-0.219285,...,-0.060859,-0.035123,-0.006991,0.021725,1.0,dws_1/sub_1.csv,0,1,300,4
3,0.453086,0.428134,0.145774,0.035071,-0.073498,-0.076478,-0.071926,-0.070026,-0.09827,-0.088027,...,0.034487,0.025593,0.009984,-0.012116,1.0,dws_1/sub_1.csv,0,1,450,4
4,-0.12326,-0.197224,0.104588,0.464974,0.476676,-0.173624,-0.311906,-0.396358,-0.354741,-0.343695,...,0.0168,-0.017304,-0.065986,-0.116997,1.0,dws_1/sub_1.csv,0,1,600,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9237,-0.219509,0.039405,0.177356,0.163627,0.080003,-0.014922,-0.057913,0.004133,0.091218,0.165302,...,-0.615996,-0.563828,-0.51997,-0.519647,9.0,wlk_8/sub_9.csv,4,8,3900,2
9238,0.41574,0.001829,-0.467739,-0.385918,0.186728,0.043864,-0.726447,-0.768545,-0.734806,-0.569898,...,-0.329562,-0.392729,-0.444477,-0.492672,9.0,wlk_8/sub_9.csv,4,8,4050,2
9239,-0.128104,0.136487,0.138946,-0.044175,-0.09986,-0.104913,-0.211261,-0.335558,-0.412587,-0.413217,...,-0.293291,-0.255729,-0.223467,-0.191943,9.0,wlk_8/sub_9.csv,4,8,4200,2
9240,-0.198929,-0.270276,-0.092972,0.201214,0.290746,0.130444,-0.023268,0.024589,0.179717,0.089722,...,-0.643083,-0.595136,-0.550171,-0.534283,9.0,wlk_8/sub_9.csv,4,8,4350,2


## Normatizado

In [5]:
dataframe = read_motionsense(motionsense_path)

renamer = RenameColumns(
    columns_map=columns_to_rename,
)

add_gravity = AddGravityColumn(
    axis_columns=["accel-x", "accel-y", "accel-z"],
    gravity_columns=["gravity.x", "gravity.y", "gravity.z"],
)

conversor = Convert_G_to_Ms2(
    axis_columns=["accel-x", "accel-y", "accel-z"],
)

butterworth = ButterworthFilter(
    axis_columns=["accel-x", "accel-y", "accel-z"],
    fs=50
)

resampler = Resampler(
    groupby_column=column_group,
    features_to_select=feature_columns,
    original_fs=50,
    target_fs=20,
)

windowizer = Windowize(
    features_to_select=feature_columns,
    samples_per_window=60,
    samples_per_overlap=0,
)

standard_label_adder = AddStandardActivityCode(standard_activity_code_map)

pipeline = Pipeline(
    [
        renamer,
        add_gravity,
        conversor,
        butterworth,
        resampler,
        windowizer,
        standard_label_adder
    ]
)

new_df_normalized = pipeline(dataframe)
new_df_normalized

Executing RenameColumns
Executing AddGravityColumn
Executing Convert_G_to_Ms2
Executing ButterworthFilter
Executing Resampler


Resampling: 100%|██████████| 360/360 [00:06<00:00, 53.47it/s]


Executing Windowize


Creating windows: 100%|██████████| 360/360 [00:50<00:00,  7.14it/s]


Executing AddStandardActivityCode


Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gravity.z-57,gravity.z-58,gravity.z-59,level_0,user,csv,activity code,serial,index,standard activity code
0,-3.165723,-1.839049,-1.910544,-1.842408,-2.70497,-2.754713,-1.18805,-4.780722,-3.523048,-3.385084,...,-0.168824,-0.107589,-0.064963,0.0,1.0,dws_1/sub_1.csv,0,1,0,4
1,3.483643,3.102995,0.101908,-0.856858,9.185696,-3.018065,-8.056853,-4.853502,-5.579025,1.142508,...,-0.2317,-0.403346,-0.429845,60.0,1.0,dws_1/sub_1.csv,0,1,60,4
2,0.663245,2.818359,2.001309,-1.583959,-1.164375,1.220145,-1.351822,4.003988,1.418277,0.224874,...,-0.093774,-0.076443,-0.012494,120.0,1.0,dws_1/sub_1.csv,0,1,120,4
3,3.808319,-0.388694,-1.037497,-1.379841,0.055843,2.503732,4.420077,1.993684,3.215546,0.478973,...,0.078148,0.03872,0.015126,180.0,1.0,dws_1/sub_1.csv,0,1,180,4
4,-1.716752,4.175832,-0.251363,-2.624296,-3.149779,-0.218514,3.492643,-2.142963,-4.275885,2.76162,...,0.033785,0.032218,-0.063874,240.0,1.0,dws_1/sub_1.csv,0,1,240,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9237,-2.020072,-4.991911,5.158629,5.590274,-2.693295,-1.751136,-11.142086,5.839254,2.215771,-2.753779,...,-0.586026,-0.64633,-0.701213,1560.0,9.0,wlk_8/sub_9.csv,4,8,1560,2
9238,2.535572,3.070929,-6.669269,0.117018,-0.067042,-0.621455,-2.891514,-1.675472,-4.361608,3.559004,...,-0.086497,-0.045973,-0.053208,1620.0,9.0,wlk_8/sub_9.csv,4,8,1620,2
9239,-1.493635,-1.893001,-0.462677,1.37943,3.391596,2.355926,4.779168,3.946278,-6.687276,-3.512037,...,-0.557906,-0.497429,-0.387866,1680.0,9.0,wlk_8/sub_9.csv,4,8,1680,2
9240,-4.637259,6.210708,7.917005,-6.49157,0.017604,-8.264919,-1.359451,7.457002,-3.467726,-0.546523,...,-0.665227,-0.718157,-0.730388,1740.0,9.0,wlk_8/sub_9.csv,4,8,1740,2
