In [1]:
import numpy as np
import pandas as pd
import tqdm
from scipy import signal
from pathlib import Path

from dataset_processor import (
    AddGravityColumn,
    Convert_G_to_Ms2,
    ButterworthFilter,
    Resampler,
    Windowize,
    AddStandardActivityCode,
    RenameColumns,
    Pipeline
)

In [2]:
def read_uci(uci_path):
    activity_names = {
        1: "WALKING", 
        2: "WALKING_UPSTAIRS", 
        3: "WALKING_DOWNSTAIRS", 
        4: "SITTING", 
        5: "STANDING", 
        6: "LAYING",
        7: "STAND_TO_SIT",
        8: "SIT_TO_STAND",
        9: "SIT_TO_LIE",
        10: "LIE_TO_SIT",
        11: "STAND_TO_LIE",
        12: "LIE_TO_STAND"
    }
    activity_codes = {v: k for k, v in activity_names.items()}
    
    feature_columns = [
        "accel-x",
        "accel-y",
        "accel-z",
        "gyro-x",
        "gyro-y",
        "gyro-z",
    ]
    
#     df_labels = pd.read_csv("data/RawData/labels.txt", header=None, sep=" ")
    df_labels = pd.read_csv(uci_path+"/labels.txt", header=None, sep=" ")
    df_labels.columns=["serial", "user", "activity code", "start", "end"]
    
    uci_path = Path(uci_path)
    
    dfs = []
    data_path = list(uci_path.glob("*.txt"))
    new_data_path = [elem.name.split("_")+[elem] for elem in sorted(data_path)]
    df = pd.DataFrame(new_data_path, columns=["sensor", "serial", "user", "file"])
    for key, df2 in df.groupby(["serial", "user"]):
        acc, gyr = [], []
        for row_index, row in df2.iterrows():
            data = pd.read_csv(row["file"], header=None, sep=" ")
            if row["sensor"] == "acc":
                acc.append(data)
            else:
                gyr.append(data)
        new_df = pd.concat([acc[0], gyr[0]], axis=1)
        new_df.columns = feature_columns
        
        user = int(key[1].split(".")[0][4:])
        serial = int(key[0][3:])
        
        new_df['txt'] = row["file"]
        
        new_df["user"] = user
        new_df["serial"] = serial
#         new_df["activity code"] = -1
        
        for row_index, row in df_labels.loc[(df_labels["serial"] == serial) & (df_labels["user"] == user)].iterrows():
            start = row['start']
            end = row["end"]+1
            activity = row["activity code"]
            resumed_df = new_df.loc[start:end].copy()
            resumed_df["index"] = [i for i in range(start, end+1)]
            resumed_df["activity code"] = activity
            
            dfs.append(resumed_df)
    
    df = pd.concat(dfs)
    df.reset_index(inplace=True, drop=True)
    return df

In [3]:
# uci_path = Path("data/RawData")
uci_path = "data/RawData"

feature_columns = [
    "accel-x",
    "accel-y",
    "accel-z",
    "gyro-x",
    "gyro-y",
    "gyro-z",
]

column_group = ["user", "activity code", "serial"]

standard_activity_code_map = {
    1: 2, # walk
    2: 3, # stair up
    3: 4, # stair down
    4: 0, # sit
    5: 1, # stand
    6: -1, # Laying
    7: -1, # stand to sit
    8: -1, # sit to stand
    9: -1, # sit to lie
    10: -1, # lie to sit
    11: -1, # stand to lie
    12: -1 # lie to stand
}

## Bruto

In [4]:
dataframe = read_uci(uci_path)

windowizer = Windowize(
    features_to_select=feature_columns,
    samples_per_window=150,
    samples_per_overlap=0,
    groupby_column="txt"
)

standard_label_adder = AddStandardActivityCode(standard_activity_code_map)

pipeline = Pipeline(
    [
        windowizer,
        standard_label_adder
    ]
)
pipeline(dataframe)
new_df = pipeline(dataframe)
new_df

Executing Windowize


Creating windows: 100%|█████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:11<00:00,  5.20it/s]


Executing AddStandardActivityCode
Executing Windowize


Creating windows: 100%|█████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:12<00:00,  4.91it/s]


Executing AddStandardActivityCode


Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-146,gyro-z-147,gyro-z-148,gyro-z-149,txt,index,serial,user,activity code,standard activity code
0,1.020833,1.025,1.020833,1.016667,1.018056,1.018056,1.019445,1.016667,1.020833,1.019445,...,-0.002749,-0.005192,-0.008247,-0.013439,data/RawData/gyro_exp01_user01.txt,250,1,1,5,1
1,1.022222,1.016667,1.018056,1.023611,1.022222,1.015278,1.019445,1.019445,1.016667,1.022222,...,-0.001833,0.005498,0.002749,0.008552,data/RawData/gyro_exp01_user01.txt,400,1,1,5,1
2,1.020833,1.022222,1.018056,1.019445,1.016667,1.022222,1.019445,1.019445,1.022222,1.022222,...,0.013134,0.015272,0.007636,0.003971,data/RawData/gyro_exp01_user01.txt,550,1,1,5,1
3,1.020833,1.020833,1.015278,1.016667,1.016667,1.023611,1.022222,1.022222,1.018056,1.018056,...,-0.00672,-0.007636,-0.005192,-0.004887,data/RawData/gyro_exp01_user01.txt,700,1,1,5,1
4,1.019445,1.022222,1.026389,1.020833,1.022222,1.019445,1.022222,1.025,1.019445,1.016667,...,0.006109,0.013134,-0.00336,-0.00733,data/RawData/gyro_exp01_user01.txt,850,1,1,5,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5412,0.883333,0.85,0.797222,0.768056,0.733333,0.894445,1.291667,1.738889,1.870833,1.656945,...,0.129809,0.095295,0.009163,-0.063225,data/RawData/gyro_exp61_user30.txt,17060,61,30,3,4
5413,0.831944,0.822222,0.825,0.829167,0.829167,0.854167,0.8875,0.8875,0.956944,1.077778,...,0.113316,0.089797,0.023213,-0.066279,data/RawData/gyro_exp61_user30.txt,17210,61,30,3,4
5414,0.918056,0.968056,1.094444,1.361111,1.708333,1.765278,1.511111,1.079167,0.863889,0.815278,...,0.068722,-0.113926,-0.307876,-0.274889,data/RawData/gyro_exp61_user30.txt,17502,61,30,2,3
5415,1.061111,1.113889,1.072222,1.025,0.923611,0.822222,0.725,0.665278,0.715278,0.801389,...,0.336892,0.04612,-0.277944,-0.421497,data/RawData/gyro_exp61_user30.txt,17652,61,30,2,3


## Normatizado

In [5]:
dataframe = read_uci(uci_path)

conversor = Convert_G_to_Ms2(
    axis_columns=["accel-x", "accel-y", "accel-z"],
)

butterworth = ButterworthFilter(
    axis_columns=["accel-x", "accel-y", "accel-z"],
    fs=50
)

resampler = Resampler(
    groupby_column=column_group,
    features_to_select=feature_columns,
    original_fs=50,
    target_fs=20,
)

windowizer = Windowize(
    features_to_select=feature_columns,
    samples_per_window=60,
    samples_per_overlap=0,
    groupby_column="txt"
)

standard_label_adder = AddStandardActivityCode(standard_activity_code_map)

pipeline = Pipeline(
    [
        conversor,
        butterworth,
        resampler,
        windowizer,
        standard_label_adder
    ]
)

new_df_normalized = pipeline(dataframe)
new_df_normalized

Executing Convert_G_to_Ms2
Executing ButterworthFilter
Executing Resampler


Resampling: 100%|████████████████████████████████████████████████████████████████████████████████████████| 714/714 [00:01<00:00, 500.32it/s]


Executing Windowize


Creating windows: 100%|█████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:12<00:00,  4.87it/s]


Executing AddStandardActivityCode


Unnamed: 0,accel-x-0,accel-x-1,accel-x-2,accel-x-3,accel-x-4,accel-x-5,accel-x-6,accel-x-7,accel-x-8,accel-x-9,...,gyro-z-57,gyro-z-58,gyro-z-59,txt,index,serial,user,activity code,level_0,standard activity code
0,-0.367357,0.069396,-0.068639,0.014016,-0.019516,0.004483,-0.021996,0.014873,-0.007886,0.005317,...,0.007662,0.007955,-0.01007,data/RawData/gyro_exp01_user01.txt,250,1,1,5,0,1
1,-0.01275,0.013796,-0.005696,-0.007337,-0.005671,0.022944,0.035937,-0.005661,0.003343,-0.006867,...,0.002363,0.005061,0.002717,data/RawData/gyro_exp01_user01.txt,310,1,1,5,60,1
2,0.00339,-0.011755,-0.007506,0.007095,0.019782,-0.009799,0.000678,0.012998,-0.014856,0.017593,...,0.008193,0.025263,0.0072,data/RawData/gyro_exp01_user01.txt,370,1,1,5,120,1
3,0.010267,-0.045824,0.00709,-0.002502,-0.005129,0.047995,-0.030375,-0.011676,-0.019336,0.013753,...,0.004722,-0.004918,-0.006481,data/RawData/gyro_exp01_user01.txt,430,1,1,5,180,1
4,0.007189,0.019797,0.010878,0.002397,-0.029111,0.027562,-0.021273,-0.002891,-0.038462,-0.009134,...,0.002905,0.009598,0.002101,data/RawData/gyro_exp01_user01.txt,490,1,1,5,240,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5405,-0.789853,-1.380396,-0.893262,-1.590908,-2.453743,-0.743518,0.164007,0.764993,1.859311,4.578271,...,0.437516,0.086003,-0.582233,data/RawData/gyro_exp61_user30.txt,14498,61,30,2,813928,3
5406,-0.421867,-0.731226,2.174663,2.032768,5.658664,5.200622,1.276557,-1.081012,-1.636848,-2.692239,...,-0.34451,-0.212947,-0.444067,data/RawData/gyro_exp61_user30.txt,14558,61,30,2,813988,3
5407,-3.23275,-1.711271,-2.360084,2.773505,6.79083,4.365568,4.408153,0.567427,-1.333725,-2.156933,...,-0.284184,-0.000707,-0.290697,data/RawData/gyro_exp61_user30.txt,14793,61,30,3,814048,4
5408,-2.619691,-3.280935,-1.225632,5.418072,6.896853,2.3349,4.147937,1.514696,-1.971579,-2.096206,...,0.035225,-0.002848,0.086995,data/RawData/gyro_exp61_user30.txt,14853,61,30,3,814108,4
