In [41]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [42]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [43]:
cd '/content/drive/Shared drives/Rahul_Mohit/ECE202A/WALG/Data/WISDM_PAMAP2'

/content/drive/Shared drives/Rahul_Mohit/ECE202A/WALG/Data/WISDM_PAMAP2


In [44]:
import pandas as pd
import numpy as np
import os

In [45]:
## Function to get one-hot label for the activity.
def label_to_onehot(label):
  onehot = np.zeros(shape=num_activities)
  idx = ACTIVITIES_MAP[label]
  onehot[idx] = 1
  return onehot;

## Function to geenerate sliding windows.
def sliding_window(frame_length, step, X):
    X_f = []
    Y_f = []
    users = np.unique(X[:,0]);
    for user in users:
      user_rows = X[X[:,0]==user];
      activities = np.unique(user_rows[:,1]);
      for activity in activities:
        if activity in ACTIVITIES_MAP:
          activity_rows = user_rows[user_rows[:,1]==activity];
          onehot_label = label_to_onehot(activity)
          for loop in range(0,activity_rows.shape[0]-frame_length,step):
            X_f.append(
              np.vstack((
                activity_rows[loop: loop+frame_length,3:9],
                np.mean(activity_rows[loop: loop+frame_length,3:9], axis=0),
                np.std(activity_rows[loop: loop+frame_length,3:9], axis=0, dtype=np.float64)
              )
            ));
            Y_f.append(onehot_label);
    return X_f, Y_f

## Keep only accel and gyro data from hand sensor from PAMAP2 Dataset
def get_hand_ag(input_d):
  filtered_d = np.empty([input_d.shape[0], input_d.shape[1], 6])
  for x in range(input_d.shape[0]):
    for y in range(input_d.shape[1]):
      for z in range(6):
        filtered_d[x][y][z] = input_d[x][y][z]
  return filtered_d

## Save Data as numpy files
def numpify_and_store(X, y, X_name, y_name, outdatapath, shuffle=False):
    X = np.array(X)
    y = np.array(y)
    # Shuffle the train set
    if shuffle is True:
        np.random.seed(123)
        neworder = np.random.permutation(X.shape[0])
        X = X[neworder, :, :]
        y = y[neworder, :]
    # Save binary file
    xpath = os.path.join(outdatapath, X_name)
    ypath = os.path.join(outdatapath, y_name)
    np.save(xpath, X)
    np.save(ypath, y)
    print('   -> Storing data as ' + xpath, ypath)

In [46]:
## Variables
final_dataset  = 'WISDM_PAMAP2';
wisdm_raw_dir  = '../WISDM/wisdm-dataset/raw/watch/';
pamap2_dir     = '../PAMAP2/';
num_activities = 7;
frame_length   = int(200);
step           = 40;
num_train_user = 35;
num_val_user   = 8;
num_test_user  = 8;

train_start_user = 1600;
train_end_user   = train_start_user+num_train_user-1;
val_start_user   = train_end_user+1;
val_end_user     = val_start_user+num_val_user-1;
test_start_user  = val_end_user+1;
test_end_user    = test_start_user+num_test_user-1;

final_dataset_dir = './';
X_train_file      = 'X_' + final_dataset + '_train';
Y_train_file      = 'Y_' + final_dataset + '_train';
X_val_file        = 'X_' + final_dataset + '_val';
Y_val_file        = 'Y_' + final_dataset + '_val';
X_test_file       = 'X_' + final_dataset + '_test';
Y_test_file       = 'Y_' + final_dataset + '_test';

ACTIVITIES_MAP = {
    'A' : 0,
    'B' : 5,
    'C' : 1,
    'D' : 2,
    'F' : 2,
    'G' : 3,
    'I' : 4,
    'J' : 4,
    'R' : 6
}

In [47]:
#! sed -i -e "s/;//g" wisdm-dataset/raw/watch/*/*.txt
#! for f in wisdm-dataset/raw/watch/*/*.txt; do  mv "$f" "${f%.txt}.csv"; done

In [48]:
merge_l = [];
print ("Reading CSV files for WISDM dataset:");
for user in range(1600,1651):
  print ("   => Reading CSV file for user {}".format(user));
  accel_csv = wisdm_raw_dir + "accel/data_" + str(user) + "_accel_watch.csv";
  gyro_csv  = wisdm_raw_dir + "gyro/data_" + str(user) + "_gyro_watch.csv";
  accel_df  = pd.read_csv(accel_csv, names=["User", "Activity", "Time_Stamp", "Accel_X", "Accel_Y", "Accel_Z"]);
  gyro_df   = pd.read_csv(gyro_csv, names=["User", "Activity", "Time_Stamp", "Gyro_X", "Gyro_Y", "Gyro_Z"]);
  merge_df  = accel_df.merge(gyro_df, how='inner', on = ["User", "Activity", "Time_Stamp"]);
  merge_l.append(merge_df);

merged_df = pd.concat(merge_l);

print ("Activty list per user:");
for user in range(1600,1651):
  print ("   => ",user, np.unique(merged_df[merged_df['User']==user].Activity).shape[0], np.unique(merged_df[merged_df['User']==user].Activity))

## Splitting Data into Train, Test and Validation
print ("Splitting dataset into train, validation and test datasets.");
train_df = merged_df.loc[(merged_df['User'] >= train_start_user) & (merged_df['User'] <= train_end_user)]
val_df   = merged_df.loc[(merged_df['User'] >= val_start_user  ) & (merged_df['User'] <= val_end_user  )]
test_df  = merged_df.loc[(merged_df['User'] >= test_start_user ) & (merged_df['User'] <= test_end_user )]

train_np = train_df.to_numpy();
val_np   = val_df.to_numpy();
test_np  = test_df.to_numpy();

## Generating sliding windows for the dataset
print ("Generating sliding windows from the data.");
x_train_psw_np, y_train_psw_np = sliding_window(frame_length, step, train_np);
x_val_psw_np,   y_val_psw_np   = sliding_window(frame_length, step, val_np);
x_test_psw_np,  y_test_psw_np  = sliding_window(frame_length, step, test_np);

## Add data from PAMAP2
print ("Adding PAMAP2 data:");
for activity, code in ["sitting", "D"], ["walking","A"]:
  print ("   => Adding PAMAP2 data for activity: ",activity);
  X_train_P = np.load(pamap2_dir + activity + '/X_PAMAP2_train.npy');
  X_val_P   = np.load(pamap2_dir + activity + '/X_PAMAP2_val.npy');
  X_test_P  = np.load(pamap2_dir + activity + '/X_PAMAP2_test.npy');
  X_train_P = get_hand_ag(X_train_P);
  X_val_P   = get_hand_ag(X_val_P);
  X_test_P  = get_hand_ag(X_test_P);
  for x in X_train_P:
    x_train_psw_np.append(x);
    y_train_psw_np.append(label_to_onehot(code));
  for x in X_val_P:
    x_val_psw_np.append(x);
    y_val_psw_np.append(label_to_onehot(code));
  for x in X_test_P:
    x_test_psw_np.append(x);
    y_test_psw_np.append(label_to_onehot(code));

## Saving data as numpy files.
print ("Saving data as numpy files.");
numpify_and_store(
  x_train_psw_np, 
  y_train_psw_np, 
  X_name=X_train_file,
  y_name=Y_train_file,
  outdatapath=final_dataset_dir, 
  shuffle=True 
);
numpify_and_store(
  x_val_psw_np, 
  y_val_psw_np, 
  X_name=X_val_file,
  y_name=Y_val_file,
  outdatapath=final_dataset_dir, 
  shuffle=False 
);
numpify_and_store(
  x_test_psw_np, 
  y_test_psw_np, 
  X_name=X_test_file,
  y_name=Y_test_file,
  outdatapath=final_dataset_dir, 
  shuffle=False 
);

Reading CSV files for WISDM dataset:
   => Reading CSV file for user 1600
   => Reading CSV file for user 1601
   => Reading CSV file for user 1602
   => Reading CSV file for user 1603
   => Reading CSV file for user 1604
   => Reading CSV file for user 1605
   => Reading CSV file for user 1606
   => Reading CSV file for user 1607
   => Reading CSV file for user 1608
   => Reading CSV file for user 1609
   => Reading CSV file for user 1610
   => Reading CSV file for user 1611
   => Reading CSV file for user 1612
   => Reading CSV file for user 1613
   => Reading CSV file for user 1614
   => Reading CSV file for user 1615
   => Reading CSV file for user 1616
   => Reading CSV file for user 1617
   => Reading CSV file for user 1618
   => Reading CSV file for user 1619
   => Reading CSV file for user 1620
   => Reading CSV file for user 1621
   => Reading CSV file for user 1622
   => Reading CSV file for user 1623
   => Reading CSV file for user 1624
   => Reading CSV file for user 1625
 