### This repository is for converting the data into a form usable by the TSFresh package

In [1]:
%matplotlib inline
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [2]:
data = pd.read_csv('../data/new_data_only.csv', names = ['x_acc', 'y_acc', 'z_acc'])
print(data.head())

      x_acc     y_acc     z_acc
0  0.496614  0.507404  0.664175
1  0.593133  0.544373  0.639089
2  0.583067  0.535586  0.626219
3  0.556202  0.507976  0.593481
4  0.556658  0.492937  0.577615


We need an `id` column which indicates that each `reqd_len` samples belong to a different sample. This can be done by creating a column which contains 0 for the first `reqd_len` rows, 1 for the next `reqd_len` rows, 2 for the next `reqd_len` rows and so on. 

Also, we need to ensure data formatting as per [this page](https://tsfresh.readthedocs.io/en/latest/text/data_formats.html)

#### Generating `ID` column in a list

In [3]:
reqd_len = 100
idx = list()
k = 0
for i in range(len(data) // reqd_len) : 
    for j in range(reqd_len) : 
        idx.append(k)
    k = k + 1
    
print(len(idx))

53300


#### Adding this column into the DataFrame and saving for future use

In [4]:
data['id'] = idx
print(data.head())
data.to_csv('../data/new_data_only_tsfresh_compatible.csv', header = False, index = False)

      x_acc     y_acc     z_acc  id
0  0.496614  0.507404  0.664175   0
1  0.593133  0.544373  0.639089   0
2  0.583067  0.535586  0.626219   0
3  0.556202  0.507976  0.593481   0
4  0.556658  0.492937  0.577615   0


### Using alternate examples only

In [3]:
data = pd.read_csv('../data/alt_data_only.csv', names = ['x_acc', 'y_acc', 'z_acc'])
print(data.head())

      x_acc     y_acc     z_acc
0 -0.086161 -0.086288  0.051617
1 -0.086161 -0.086288  0.051617
2 -0.062439 -0.262146  0.370544
3 -0.149780 -0.278687  0.396240
4 -0.153320 -0.258911  0.242493


We need an `id` column which indicates that each 150 samples belong to a different sample. This can be done by creating a column which contains 0 for the first 150 rows, 1 for the next 150 rows, 2 for the next 150 rows and so on. 

Also, we need to ensure data formatting as per [this page](https://tsfresh.readthedocs.io/en/latest/text/data_formats.html)

#### Generating `ID` column in a list

In [4]:
idx = list()
k = 0
for i in range(len(data) // 150) : 
    for j in range(150) : 
        idx.append(k)
    k = k + 1
    
print(len(idx))

79950


In [5]:
data['id'] = idx
print(data.head())
data.to_csv('../data/alt_data_only_tsfresh_compatible.csv', header = False, index = False)

      x_acc     y_acc     z_acc  id
0 -0.086161 -0.086288  0.051617   0
1 -0.086161 -0.086288  0.051617   0
2 -0.062439 -0.262146  0.370544   0
3 -0.149780 -0.278687  0.396240   0
4 -0.153320 -0.258911  0.242493   0
