### Prepare sensors data

In [179]:
from book.Chapter2.CreateDataset import CreateDataset
from pathlib import Path
import pandas as pd
import os

DATASET_PATH = Path("raw_datasets/tara_train/")
RESULT_PATH = Path("./datasets/")
RESULT_FILENAME = "tara_train.csv"
GRANULARITY = 1000

[path.mkdir(exist_ok=True, parents=True)
 for path in [DATASET_PATH, RESULT_PATH]]

print("Please wait, this will take a while to run!")

create_dataset_object: CreateDataset = CreateDataset(DATASET_PATH, GRANULARITY)

create_dataset_object.add_numerical_dataset(
    "Accelerometer.csv",
    "time",
    ["z", "y", "x"],
    "avg",
    "accel_"
)

create_dataset_object.add_numerical_dataset(
    "Gyroscope.csv",
    "time",
    ["z", "y", "x"],
    "avg",
    "gyro_"
)

create_dataset_object.add_numerical_dataset(
    "Gravity.csv",
    "time",
    ["z", "y", "x"],
    "avg",
    "gravity_"
)

create_dataset_object.add_numerical_dataset(
    "Orientation.csv",
    "time",
    ["qz", "qy", "qx", "qw"],
    "avg",
    "ori_"
)

create_dataset_object.add_numerical_dataset(
    "Magnetometer.csv",
    "time",
    ["z", "y", "x"],
    "avg",
    "magne_"
)

create_dataset_object.add_numerical_dataset(
    "Barometer.csv",
    "time",
    ["relativeAltitude", "pressure"],
    "avg",
    "bar_"
)

create_dataset_object.add_numerical_dataset(
    "Location.csv",
    "time",
    ["altitude", "latitude", "longitude"],
    "avg",
    "gps_"
)

dataset: pd.DataFrame = create_dataset_object.data_table
target_dataset_path = f"{RESULT_PATH}/{RESULT_FILENAME}"
dataset['time'] = dataset.index
dataset = dataset.reset_index(drop=True)
dataset.head(5)

Please wait, this will take a while to run!
Reading data from Accelerometer.csv
Reading data from Gyroscope.csv
Reading data from Gravity.csv
Reading data from Orientation.csv
Reading data from Magnetometer.csv
Reading data from Barometer.csv
Reading data from Location.csv


Unnamed: 0,accel_z,accel_y,accel_x,gyro_z,gyro_y,gyro_x,gravity_z,gravity_y,gravity_x,ori_qz,...,ori_qw,magne_z,magne_y,magne_x,bar_relativeAltitude,bar_pressure,gps_altitude,gps_latitude,gps_longitude,time
0,-0.00406758,0.0339792,-0.0270799,-0.051672,0.075464,-0.011046,-7.34345,-6.471474,-0.457445,-0.561899,...,0.747427,-47.232837,-12.277156,4.546346,-0.042274,1017.524719,41.925382,52.045397,4.384513,2023-06-08 10:02:14.428471800
1,0.0467185,-0.0518331,0.00703794,0.005975,-0.041301,0.006421,-7.587952,-6.20784,-0.224354,-0.581494,...,0.740765,-47.81057,-10.377309,5.082871,-0.084553,1017.529984,41.930438,52.045389,4.384509,2023-06-08 10:02:15.428471800
2,0.258378,0.0157325,-0.0867344,0.292964,0.487526,-0.174606,-7.1156,-6.463938,-1.046507,-0.372905,...,0.844514,-44.860904,-16.02207,10.147813,-0.084553,1017.529984,41.87745,52.045384,4.384505,2023-06-08 10:02:16.428471800
3,-0.388019,-0.656673,0.409764,-0.058768,0.024429,-0.334667,-9.386129,-2.428225,0.830423,-0.439791,...,0.885701,-46.473158,6.270651,18.734642,-0.031708,1017.523575,41.84356,52.045386,4.384503,2023-06-08 10:02:17.428471800
4,0.181126,0.6521,0.148817,-0.114041,-0.00111,0.078117,-9.671041,-1.264088,-0.085595,-0.48319,...,0.870694,-49.733535,12.484955,13.000465,0.155897,1017.500992,41.886054,52.045386,4.384501,2023-06-08 10:02:18.428471800


### Prepare annotation data

In [180]:
annotation_dataset: pd.DataFrame = pd.read_csv(os.path.join(DATASET_PATH, "Annotation.csv"))
annotation_dataset = annotation_dataset.drop('seconds_elapsed', axis=1)
annotation_dataset['time'] = pd.to_datetime(annotation_dataset['time'])
annotation_dataset

Unnamed: 0,time,text
0,2023-06-08 10:02:22.139,relaxing
1,2023-06-08 10:30:38.874,eating
2,2023-06-08 10:57:30.087,relaxing
3,2023-06-08 11:21:31.816,walking
4,2023-06-08 11:52:46.019,studying
5,2023-06-08 14:23:46.458,relaxing


In [181]:
annotation_dataset['end_time'] = 0

# Get the end time for the current row by looking at the start time of the next row
for i in range(len(annotation_dataset) - 1):
    current_time = annotation_dataset.loc[i, 'time']
    next_time = annotation_dataset.loc[i + 1, 'time']
    annotation_dataset.loc[i, 'end_time'] = next_time

# Get the last timestamp for which we have data from the sensor recordings:
last_timestamp = dataset.iloc[-1]['time']

# Set the value for the last row
annotation_dataset.iloc[-1, annotation_dataset.columns.get_loc("end_time")] = last_timestamp

annotation_dataset

Unnamed: 0,time,text,end_time
0,2023-06-08 10:02:22.139,relaxing,2023-06-08 10:30:38.874000
1,2023-06-08 10:30:38.874,eating,2023-06-08 10:57:30.087000
2,2023-06-08 10:57:30.087,relaxing,2023-06-08 11:21:31.816000
3,2023-06-08 11:21:31.816,walking,2023-06-08 11:52:46.019000
4,2023-06-08 11:52:46.019,studying,2023-06-08 14:23:46.458000
5,2023-06-08 14:23:46.458,relaxing,2023-06-08 16:02:17.428471800


### Build the final dataset

In [182]:
# Add the labels
labels = annotation_dataset['text'].unique()

for label in annotation_dataset['text'].unique():
    dataset[label] = 0

In [183]:
# Mark the labels where appropriate
for index, row in annotation_dataset.iterrows():
    start_time = row['time']
    end_time = row['end_time']
    label = row['text']
    dataset.loc[(dataset['time'] >= start_time) & (dataset['time'] <= end_time), label] = 1

# Drop all the rows that are not labeled
dataset = dataset[dataset[labels].sum(axis=1) != 0]
dataset.head(5)

Unnamed: 0,accel_z,accel_y,accel_x,gyro_z,gyro_y,gyro_x,gravity_z,gravity_y,gravity_x,ori_qz,...,bar_relativeAltitude,bar_pressure,gps_altitude,gps_latitude,gps_longitude,time,relaxing,eating,walking,studying
8,-0.170119,0.161832,0.227087,-0.106231,0.010597,-0.084743,-7.913668,-5.737212,0.255096,-0.680946,...,0.348789,1017.477798,41.531347,52.045377,4.384529,2023-06-08 10:02:22.428471800,1,0,0,0
9,-0.0136824,0.0356441,-0.121006,0.063131,-0.017078,0.058775,-8.082583,-5.502527,-0.606385,-0.702423,...,0.364643,1017.47551,41.680638,52.045386,4.384549,2023-06-08 10:02:23.428471800,1,0,0,0
10,-0.0109192,0.0231025,0.0102154,-0.020131,0.028427,-0.066613,-8.030929,-5.606764,-0.185498,-0.683085,...,0.38578,1017.473145,41.680506,52.04539,4.384558,2023-06-08 10:02:24.428471800,1,0,0,0
11,-0.00153206,0.0161529,-0.0225298,-0.040057,0.022515,0.068452,-8.263604,-5.239394,-0.517118,-0.702382,...,0.391064,1017.472839,41.747784,52.04539,4.384556,2023-06-08 10:02:25.428471800,1,0,0,0
12,-0.0316193,0.0243866,0.00667646,-0.01686,-0.112503,0.110842,-7.518719,-6.229657,-0.477621,-0.69589,...,0.391064,1017.472839,41.723107,52.045389,4.384561,2023-06-08 10:02:26.428471800,1,0,0,0


In [184]:
print(f"Writing processed dataset to {target_dataset_path}")
dataset.to_csv(target_dataset_path, index=False)

Writing processed dataset to datasets/tara_train.csv
