# Pre-processing MotionSense Dataset and Generate Views - Without Gravity - Filtered - Multiplying Acc by 9.81 m/s² - Resampling to 20 Hz

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from typing import List
import hashlib
import pandas as pd

from librep.datasets.har.motionsense import (
    RawMotionSense,
    RawMotionSenseIterator,
    MotionSenseDatasetGenerator
)
from librep.utils.dataset import PandasDatasetsIO

%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


## Raw balanced MotionSense

In [3]:
dataset_dir = Path("../../../../../data_2/views/MotionSense/A_DeviceMotion_data")
motionsense_dataset = RawMotionSense(dataset_dir, download=False)
motionsense_dataset

MotionSense Dataset at: '../../../../../data_2/views/MotionSense/A_DeviceMotion_data'

In [4]:
act_names = [motionsense_dataset.activity_names[i] for i in motionsense_dataset.activities]
act_names

['dws', 'ups', 'sit', 'std', 'wlk', 'jog']

In [5]:
iterator = RawMotionSenseIterator(motionsense_dataset, users_to_select=None, activities_to_select=None)
iterator

MotionSense Iterator: users=24, activities=6

In [6]:
motionsense_raw = MotionSenseDatasetGenerator(iterator, time_window=150, window_overlap=0, add_gravity=False, 
                                             add_filter=False, resampler=False, change_acc_measure=False)

motionsense_raw

Dataset generator: time_window=150, overlap=0

In [7]:
train_raw, validation_raw, test_raw = motionsense_raw.create_datasets(
    train_size=0.7,
    validation_size=0.1,
    test_size=0.2,
    ensure_distinct_users_per_dataset=True,
    balance_samples=True,
    seed=0
)

Generating full df over MotionSense View: 360it [00:15, 23.58it/s]


(9242, 1805)


In [8]:
print(hashlib.sha1(pd.util.hash_pandas_object(train_raw).values).hexdigest())
print(hashlib.sha1(pd.util.hash_pandas_object(validation_raw).values).hexdigest())
print(hashlib.sha1(pd.util.hash_pandas_object(test_raw).values).hexdigest())

0512b620647e1d62b56cdb34a2330acba29f53c0
e3194e06a68bd169d5451dbff7c42648ca2c4289
004c02910e8228058c1373f0491d184c26d61ebe


In [9]:
output_path = Path("../../data/views/MotionSense/raw_balanced")

description = """# Raw Balanced MotionSense

This view contains train, validation and test subsets in the following proportions:
- Train: 70% of samples
- Validation: 10% of samples
- Test: 20% of samples

After splits, the datasets were balanced in relation to the activity code column, that is, each subset have the same number of activitiy samples.

**NOTE**: Each subset contain samples from distinct users, that is, samples of one user belongs exclusivelly to one of three subsets.
"""
pandas_io = PandasDatasetsIO(output_path)
pandas_io

PandasDatasetIO at '../../data/views/MotionSense/raw_balanced'

In [10]:
pandas_io.save(
    train=train_raw, 
    validation=validation_raw, 
    test=test_raw, 
    description=description
)

## Normalized balanced MotionSense

In [11]:
motionsense_normalized = MotionSenseDatasetGenerator(iterator, time_window=60, window_overlap=0, add_gravity=True, 
                                                     add_filter=True, resampler=True, fs=20, 
                                                     change_acc_measure=True)

motionsense_normalized

Dataset generator: time_window=60, overlap=0

In [12]:
train_normalized, validation_normalized, test_normalized = motionsense_normalized.create_datasets(
    train_size=0.7,
    validation_size=0.1,
    test_size=0.2,
    ensure_distinct_users_per_dataset=True,
    balance_samples=True,
    seed=0
)

Generating full df over MotionSense View: 360it [00:12, 28.94it/s]


(9242, 725)


In [13]:
print(hashlib.sha1(pd.util.hash_pandas_object(train_normalized).values).hexdigest())
print(hashlib.sha1(pd.util.hash_pandas_object(validation_normalized).values).hexdigest())
print(hashlib.sha1(pd.util.hash_pandas_object(test_normalized).values).hexdigest())

bad20a4b7ffc7d1f246e00baaf9af24f958ce2d8
51ac451d8a7d84ec970c19f36ba028f4f0c3fb7f
34ff2de21405078ac40f9d538ce6ad28ba8bf044


In [14]:
output_path = Path("../../data/views/MotionSense/normalized_balanced")

description = """# Normalized Balanced MotionSense

This view contains train, validation and test subsets in the following proportions:
- Train: 70% of samples
- Validation: 10% of samples
- Test: 20% of samples

After splits, the datasets were balanced in relation to the activity code column, that is, each subset have the same number of activitiy samples. In this dataset we sum the user acceleration gravity and apply a high pass Butterworth filter with 0.3 cutoff to remove the user aceleration, and after this we resampler the signal from 50Hz to 20Hz. 

**NOTE**: Each subset contain samples from distinct users, that is, samples of one user belongs exclusivelly to one of three subsets.
"""
pandas_io = PandasDatasetsIO(output_path)
pandas_io

PandasDatasetIO at '../../data/views/MotionSense/normalized_balanced'

In [15]:
pandas_io.save(
    train=train_normalized, 
    validation=validation_normalized, 
    test=test_normalized, 
    description=description
)