# Import packages

In [1]:
import os, wget, zipfile
import pandas as pd
import numpy as np
import sklearn

# Download and extract data

In [2]:
def download_data(destination_dir, url="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6960825/bin/sensors-19-05524-s001.zip"):

    # Make path if does not exits
    if(os.path.exists(destination_dir) == False):
        os.makedirs(destination_dir)
    # Download data if it does not exist on disk
    if(os.path.exists(os.path.join(destination_dir, "sensors-19-05524-s001.zip")) == False):
        wget.download(url, destination_dir)
        
def extract_data(data_dir, filename="sensors-19-05524-s001.zip"):
    
    with zipfile.ZipFile(os.path.join(data_dir, filename), 'r') as zip_ref:
        zip_ref.extractall(data_dir)

In [3]:
# Specify project root directory
root_dir = "/mnt/Disk-2/My Stuff/UWaterloo/Coursework/Spring'22/ECE 659/Project/activity-recognition/"
# Specify data directory
raw_data_dir = os.path.join(root_dir, 'data/raw')
# Download data
download_data(raw_data_dir)
# Extract data
extract_data(raw_data_dir)

# Load data

In [4]:
# Load data
data_path = os.path.join(raw_data_dir, 'data.txt')
data = pd.read_csv(data_path, header=None)

# Load column headers
column_headers_path = os.path.join(raw_data_dir, 'labels.txt')
column_headers = np.loadtxt(column_headers_path, delimiter='\n', dtype='str')

# Insert column headers
data.columns = column_headers

# Data wrangling

In [5]:
# Get input data
X = data.iloc[:, :-1]
X = np.array(X)

# Get target
y = np.array(data.iloc[:, -1])
# Conversion process - '\x01.0f' ---> replace('.0f', '') ---> '\x01' ---> ord() ---> 1
y = np.array(list(map(lambda x:ord(x.replace('.0f', '')), y))).astype('float')

In [6]:
# Split data into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
# Normalize data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train, X_test = scaler.transform(X_train), scaler.transform(X_test)

# Dimensionality reduction

In [8]:
dr_data_dict = dict()

## I. Linear methods
### 1. PCA

In [9]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit(X_train)
dr_data_dict['pca'] = (pca.transform(X_train), pca.transform(X_test))

### 2. LDA

In [10]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(X_train, y_train)
dr_data_dict['lda'] = (lda.transform(X_train), lda.transform(X_test))

### 3. Truncated SVD

In [11]:
from sklearn.decomposition import TruncatedSVD

truncated_svd = TruncatedSVD(n_components=2)
truncated_svd.fit(X_train)
dr_data_dict['truncated_svd'] = (truncated_svd.transform(X_train), truncated_svd.transform(X_test))

## II. Non-linear methods