In [None]:
# GDrive mount (required only if being run on GDrive)
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
# importing dependencies
import os
import pandas as pd
import numpy as np


In [None]:
##### PLEASE SET THESE CAREFULLY refer :instructions.txt
INPUT_DIR = "/gdrive/Shareddrives/DLNN_ProjC1/input/ECE542_sp2021_Project_TerrainRecognition"
OUTPUT_DIR_TRAIN = "/gdrive/Shareddrives/DLNN_ProjC1/output/sync_data_ishan/train"
OUTPUT_DIR_TEST = "/gdrive/Shareddrives/DLNN_ProjC1/output/sync_data_ishan/test"
#####


# Based on directory structure provided by teaching staff
TRAIN_DIR = os.path.join(INPUT_DIR, "TrainingData")
TEST_DIR = os.path.join(INPUT_DIR, "TestData")


#Constants
ATTRIBUTE_NAMES = ["accel_x", "accel_y", "accel_z", "gyro_x", "gyro_y", "gyro_z"]
TIME = ["TIME"]
CLASS = ["CLASS"]

In [None]:
# function to synchronize the data. Logic explained in report 
def sync_data(file_x, file_x_time, file_y, file_y_time, IN_DIR=".", OUT_DIR="."):
  
  # finding the prefix for this group of files
  prefix = file_x[:15]
  print("Processing:", prefix, end = " ")
  
  # loading data in memory
  x = pd.read_csv(os.path.join(IN_DIR, file_x), names = ATTRIBUTE_NAMES)
  x_time = pd.read_csv(os.path.join(IN_DIR, file_x_time), names = TIME)
  
  y = pd.DataFrame(data = np.ones((x.shape[0], ), dtype = np.uint8)*-1, columns = CLASS)
  if file_y != None:
    y = pd.read_csv(os.path.join(IN_DIR, file_y), names = CLASS)

  y_time = pd.read_csv(os.path.join(IN_DIR, file_y_time), names = TIME)


  # column to store class in x (initialized to -1)
  df_CLASS = np.ones(shape = (x_time.shape[0],), dtype=np.uint8)*-1
  x["CLASS"] = df_CLASS

  # column to store synchronized time (initialized to -1)
  df_yTIME = np.ones_like(x_time["TIME"])*-1
  x["yTIME"] = df_yTIME
  x["xTIME"] = x_time["TIME"]



  # we first replace the x_times to the closest possible y_times (Greedy appraoch)
  y_time_pointer = 0
  y_time_temp = y_time.iloc[y_time_pointer]["TIME"]
  next_y_time_temp = y_time.iloc[min(y_time_pointer+1, y_time.shape[0]-1)]["TIME"]

  # the closest times for which y is available is stored in syncTIME column
  for i in range(x.shape[0]):
    x_time_temp = x.iloc[i]["xTIME"]
    if(abs(y_time_temp - x_time_temp) < abs(next_y_time_temp - x_time_temp)):
      x.at[i,"yTIME"] = y_time_temp
    else:
      x.at[i,"yTIME"] = next_y_time_temp
      y_time_pointer +=1;
      y_time_temp = next_y_time_temp 
      next_y_time_temp = y_time.iloc[min(y_time_pointer+1, y_time.shape[0]-1)]["TIME"]


  print(".", end = "")

  # now using the sync_time we join x and y (we also keep the sync_time attribute for debug)
  for i in range(x.shape[0]):
    time_stamp = x.iloc[i]["yTIME"]
    time_index = np.where(y_time["TIME"] == time_stamp)[0][0]
    x.at[i, "CLASS"] = y.iloc[time_index]["CLASS"]


  ## Down-sampling everything to 10Hz



  file_x_sync = prefix + "x_sync.csv"
  x.to_csv(os.path.join(OUT_DIR, file_x_sync), header= False, index = False)
  print(".", end = "")
    
  print("DONE")
  
  return True

In [None]:
# calling the above function for all of train data
# no need to call on test data (since we dont have test labels, lol)
prefixes = {filename[:15] for filename in os.listdir(os.path.join(INPUT_DIR, TRAIN_DIR))}
for prefix in sorted(prefixes):
  sync_data(prefix+"_x.csv", prefix+"_x_time.csv", prefix+"_y.csv", prefix+"_y_time.csv", IN_DIR= TRAIN_DIR, OUT_DIR=OUTPUT_DIR_TRAIN)

Processing: subject_001_01_ ..DONE
Processing: subject_001_02_ ..DONE
Processing: subject_001_03_ ..DONE
Processing: subject_001_04_ ..DONE
Processing: subject_001_05_ ..DONE
Processing: subject_001_06_ ..DONE
Processing: subject_001_07_ ..DONE
Processing: subject_001_08_ ..DONE
Processing: subject_002_01_ ..DONE
Processing: subject_002_02_ ..DONE
Processing: subject_002_03_ ..DONE
Processing: subject_002_04_ ..DONE
Processing: subject_002_05_ ..DONE
Processing: subject_003_01_ ..DONE
Processing: subject_003_02_ ..DONE
Processing: subject_003_03_ ..DONE
Processing: subject_004_01_ ..DONE
Processing: subject_004_02_ ..DONE
Processing: subject_005_01_ ..DONE
Processing: subject_005_02_ ..DONE
Processing: subject_005_03_ ..DONE
Processing: subject_006_01_ ..DONE
Processing: subject_006_02_ ..DONE
Processing: subject_006_03_ ..DONE
Processing: subject_007_01_ ..DONE
Processing: subject_007_02_ ..DONE
Processing: subject_007_03_ ..DONE
Processing: subject_007_04_ ..DONE
Processing: subject_

In [None]:
# calling the above function for all of test data
# no need to call on test data (since we dont have test labels, lol)
prefixes = {filename[:15] for filename in os.listdir(os.path.join(INPUT_DIR, TEST_DIR))}
for prefix in sorted(prefixes):
  sync_data(prefix+"_x.csv", prefix+"_x_time.csv", None, prefix+"_y_time.csv", IN_DIR= TEST_DIR, OUT_DIR=OUTPUT_DIR_TEST)

Processing: subject_009_01_ ..DONE
Processing: subject_010_01_ ..DONE
Processing: subject_011_01_ ..DONE
Processing: subject_012_01_ ..DONE
