In [None]:
import numpy as np
import pandas as pd
from numpy import save
import boto3 
import os

In [None]:
data_bucket_name='SET_YOUR_BUCKET_NAME'

In [None]:
train_ts = pd.read_csv('data/train_ts.csv')
test_ts = pd.read_csv('data/test_ts.csv')

train_ts = train_ts.drop(['Unnamed: 0'], axis=1)
test_ts = test_ts.drop(['Unnamed: 0'], axis=1)

In [None]:
def ts_to_secs(dataset, w, s):
    
    data = dataset[dataset.columns[:-3]].values    
    act_labels = dataset["act"].values
    id_labels = dataset["id"].values
    trial_labels = dataset["trial"].values

    ## We want the Rows of matrices show each Feature and the Columns show time points.
    data = data.T

    m = data.shape[0]   # Data Dimension 
    ttp = data.shape[1] # Total Time Points
    number_of_secs = int(round(((ttp - w)/s)))

    ##  Create a 3D matrix for Storing Sections  
    secs_data = np.zeros((number_of_secs , m , w ))
    act_secs_labels = np.zeros(number_of_secs)
    id_secs_labels = np.zeros(number_of_secs)

    k=0
    for i in range(0 , ttp-w, s):
        j = i // s
        if j >= number_of_secs:
            break
        if id_labels[i] != id_labels[i+w-1]: 
            continue
        if act_labels[i] != act_labels[i+w-1]: 
            continue
        if trial_labels[i] != trial_labels[i+w-1]:
            continue
            
        secs_data[k] = data[:, i:i+w]
        act_secs_labels[k] = act_labels[i].astype(int)
        id_secs_labels[k] = id_labels[i].astype(int)
        k = k+1
        
    secs_data = secs_data[0:k]
    act_secs_labels = act_secs_labels[0:k]
    id_secs_labels = id_secs_labels[0:k]
    return secs_data, act_secs_labels

In [None]:
## This Variable Defines the Size of Sliding Window
## ( e.g. 100 means in each snapshot we just consider 100 consecutive observations of each sensor) 
w = 128 # 50 Equals to 1 second for MotionSense Dataset (it is on 50Hz samplig rate)
## Here We Choose Step Size for Building Diffrent Snapshots from Time-Series Data
## ( smaller step size will increase the amount of the instances and higher computational cost may be incurred )
s = 32
train_data, act_train = ts_to_secs(train_ts.copy(), w, s)

s = 32
test_data, act_test = ts_to_secs(test_ts.copy(), w, s)

In [None]:
from keras.utils import to_categorical

act_train_labels = to_categorical(act_train)
act_test_labels = to_categorical(act_test)
    
## 3 dimensions for Convolution2D
train_data = np.expand_dims(train_data,axis=3)
test_data = np.expand_dims(test_data,axis=3)

In [None]:
from sklearn.utils import shuffle
train_data, act_train_labels = shuffle(train_data, act_train_labels, random_state=0)

In [None]:
# save to npy file
!mkdir npydata
save('npydata/train_data.npy', train_data)
save('npydata/train_labels.npy', act_train_labels)
save('npydata/test_data.npy', test_data)
save('npydata/test_labels.npy', act_test_labels)

In [None]:
# name of the bucket

s3_client = boto3.client('s3')
response = s3_client.upload_file('npydata/train_data.npy', data_bucket_name, 'train/train_data.npy')
response = s3_client.upload_file('npydata/train_labels.npy', data_bucket_name, 'train/train_labels.npy')
response = s3_client.upload_file('npydata/test_data.npy', data_bucket_name, 'test/test_data.npy')
response = s3_client.upload_file('npydata/test_labels.npy', data_bucket_name, 'test/test_labels.npy')

In [None]:
%store data_bucket_name