# Import packages

In [4]:
import os, wget, zipfile
import pandas as pd
import re
import numpy as np

In [5]:
def download_data(destination_dir, url="https://archive.ics.uci.edu/ml/machine-learning-databases/00348/MovementAAL.zip"):

    # Make path if does not exits
    if(os.path.exists(destination_dir) == False):
        os.makedirs(destination_dir)
    # Download data if it does not exist on disk
    if(os.path.exists(os.path.join(destination_dir, "MovementAAL.zip")) == False):
        wget.download(url, destination_dir)
        
def extract_data(data_dir, filename="MovementAAL.zip"):
    
    with zipfile.ZipFile(os.path.join(data_dir, filename), 'r') as zip_ref:
        zip_ref.extractall(data_dir)

# Download and extract data

In [6]:
root_dir = "/mnt/Disk-2/My Stuff/UWaterloo/Coursework/Spring'22/ECE 659/Project/ece-659-project/"
raw_data_dir = os.path.join(root_dir, 'data/raw')
download_data(raw_data_dir)
extract_data(raw_data_dir)

# Load data

In [7]:
rss_data = dict()
rss_files_dir = os.path.join(raw_data_dir, 'dataset')
for file in os.listdir(rss_files_dir):
    file_path = os.path.join(rss_files_dir, file)
    filename = file.split('.')[0]
    if(bool(re.match(r"^MovementAAL_RSS_.*.csv$", file))):
        file
        rss_data[filename] = pd.read_csv(file_path)
    else:
        target = pd.read_csv(file_path)

# Explore data

In [8]:
rss_data.keys()

dict_keys(['MovementAAL_RSS_243', 'MovementAAL_RSS_115', 'MovementAAL_RSS_56', 'MovementAAL_RSS_78', 'MovementAAL_RSS_186', 'MovementAAL_RSS_228', 'MovementAAL_RSS_190', 'MovementAAL_RSS_160', 'MovementAAL_RSS_169', 'MovementAAL_RSS_263', 'MovementAAL_RSS_2', 'MovementAAL_RSS_127', 'MovementAAL_RSS_12', 'MovementAAL_RSS_254', 'MovementAAL_RSS_99', 'MovementAAL_RSS_137', 'MovementAAL_RSS_214', 'MovementAAL_RSS_47', 'MovementAAL_RSS_154', 'MovementAAL_RSS_244', 'MovementAAL_RSS_276', 'MovementAAL_RSS_72', 'MovementAAL_RSS_206', 'MovementAAL_RSS_49', 'MovementAAL_RSS_114', 'MovementAAL_RSS_231', 'MovementAAL_RSS_81', 'MovementAAL_RSS_252', 'MovementAAL_RSS_61', 'MovementAAL_RSS_133', 'MovementAAL_RSS_24', 'MovementAAL_RSS_303', 'MovementAAL_RSS_285', 'MovementAAL_RSS_306', 'MovementAAL_RSS_71', 'MovementAAL_RSS_52', 'MovementAAL_RSS_42', 'MovementAAL_RSS_69', 'MovementAAL_RSS_207', 'MovementAAL_RSS_117', 'MovementAAL_RSS_31', 'MovementAAL_RSS_104', 'MovementAAL_RSS_75', 'MovementAAL_RSS_4

In [9]:
rss_data['MovementAAL_RSS_245'].head()

Unnamed: 0,#RSS_anchor1,RSS_anchor2,RSS_anchor3,RSS_anchor4
0,0.18182,-0.2,-0.73333,-0.95745
1,-0.27273,-0.066667,-0.73333,-0.87234
2,-0.27273,-0.022222,-0.73333,-0.65957
3,-0.27273,-0.37778,-0.73333,-0.65957
4,0.45455,-0.2,-0.6,-0.65957


In [10]:
target.head()

Unnamed: 0,#sequence_ID,class_label
0,1,1
1,2,1
2,3,1
3,4,1
4,5,1


# Data Wrangling

In [42]:
# Get maximum length of time-series
max_len = 0
for key in rss_data.keys():
    if(len(rss_data[key]) > max_len):
        max_len = len(rss_data[key])

In [43]:
# Pad all time-series instances
rss_data_padded = dict()
cols = ['#RSS_anchor1', ' RSS_anchor2', ' RSS_anchor3', ' RSS_anchor4']
for key in rss_data.keys():
    zero_df = pd.DataFrame(0, index=np.arange(len(rss_data[key]), max_len), columns=cols)
    rss_data_padded[key] = pd.concat([rss_data[key], zero_df], axis=0)

In [56]:
def create_timeseries_instance(data):
    
    arr_list = []
    cols = data.columns
    
    for col in cols:
        arr = np.array(data[col])
        reshaped_arr = np.expand_dims(arr, axis=1)
        arr_list.append(reshaped_arr)
        
    data_stacked = np.hstack(tuple(arr_list))
    return data_stacked

def create_timeseries_data(data_dict):
    
    instance_list = []
    for key in data_dict.keys():
        instance = convert_to_timeseries(data_dict[key])
        reshaped_instance = np.expand_dims(instance, axis=0)
        instance_list.append(reshaped_instance)

    data = np.concatenate(instance_list, axis=0)
    return data

In [60]:
create_timeseries_data(rss_data_padded).shape

(314, 129, 4)