In [None]:
import numpy as np
import os
import pandas as pd
from subprocess import call
import requests 
np.random.seed(0)
import urllib.request
import zipfile

In [None]:
# functions for loading and downloading the dataset

# load a single file as a numpy array
def load_file(filepath):
	dataframe = pd.read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values
 
# load a list of files, such as x, y, z data for a given variable
def load_group(filenames, prefix=''):
	loaded = list()
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
	# stack group so that features are the 3rd dimension
	loaded = np.dstack(loaded)
	return loaded
 
# load a dataset group, such as train or test
def load_dataset(group, prefix=''):
	filepath = prefix + group + '/Inertial Signals/'
	filenames = list()
	# body acceleration
	filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
	# body gyroscope
	filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
	# load input data
	X = load_group(filenames, filepath)
	# load class output
	y = load_file(prefix + group + '/y_'+group+'.txt')
	return X, y

# Framing data by windows
def segmentData(accData,time_step,step):
    segmentAccData = list()
    for i in range(0, accData.shape[0] - time_step,step):
        segmentAccData.append(accData[i:i+time_step,:])
    return segmentAccData

# download function for datasets
def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)

In [None]:
# download and unzipping dataset
os.makedirs('dataset',exist_ok=True)
print("downloading...")            
data_directory = os.path.abspath("dataset/UCI HAR Dataset.zip")
if not os.path.exists(data_directory):
    download_url("https://archive.ics.uci.edu/ml/machine-learning-databases/00240/UCI HAR Dataset.zip",data_directory)
    print("download done")
else:
    print("dataset already downloaded")
    
data_directory2 = os.path.abspath("dataset/UCI HAR Dataset")
if not os.path.exists(data_directory2): 
    print("extracting data")
    with zipfile.ZipFile(data_directory, 'r') as zip_ref:
        zip_ref.extractall(os.path.abspath("dataset/"))
    print("data extracted in " + data_directory2)
else:
    print("Data already extracted in " + data_directory2)

In [None]:
# load all train
trainX, trainy = load_dataset('train', 'dataset/UCI HAR Dataset/')
trainy = np.asarray([x - 1 for x in trainy])

# load all test
testX, testy = load_dataset('test', 'dataset/UCI HAR Dataset/')
testy = np.asarray([x - 1 for x in testy])

In [None]:
# Combining datasets
datasets = list()
for x in range(0,trainX.shape[2]):
    datasets.append(np.concatenate((trainX[:,:,x],testX[:,:,x]), axis = 0))
datasets = np.dstack(datasets)

In [None]:
# Preparing and normalizing dataset

meanAcc = np.mean(datasets[:,:,:3])
stdAcc = np.std(datasets[:,:,:3])
varAcc = np.var(datasets[:,:,:3])
stackedFeaturesAcc = np.hstack((meanAcc,stdAcc,varAcc))

meanGyro = np.mean(datasets[:,:,3:])
stdGyro = np.std(datasets[:,:,3:])
varGyro = np.var(datasets[:,:,3:])
stackedFeaturesGyro = np.hstack((meanGyro,stdGyro,varGyro))

normalizedTrainAcc = (trainX[:,:,:3] - meanAcc) / stdAcc
normalizedTrainGyro = (trainX[:,:,3:] - meanGyro) / stdGyro

normalizedTestAcc = (testX[:,:,:3] - meanAcc) / stdAcc
normalizedTestGyro = (testX[:,:,3:] - meanGyro) / stdGyro

normalizedAllAcc = (datasets[:,:,:3] - meanAcc) / stdAcc
normalizedAllGyro = (datasets[:,:,3:] - meanGyro) / stdGyro

stackedFeatures = np.vstack((stackedFeaturesAcc,stackedFeaturesGyro))

In [None]:
# stacking datasets
normalizedX = np.dstack((normalizedTrainAcc,normalizedTrainGyro))
normalizedEval = np.dstack((normalizedTestAcc,normalizedTestGyro))
normalizedAll = np.dstack((normalizedAllAcc,normalizedAllGyro))

In [None]:
# saving the UCI dataset
dataName = 'UCI'
os.makedirs('datasetStandardized/'+dataName+ '/train', exist_ok=True)
np.savetxt('datasetStandardized/'+dataName+ '/train/AccX'+dataName+'.csv', normalizedX[:,:,0], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/train/AccY'+dataName+'.csv', normalizedX[:,:,1], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/train/AccZ'+dataName+'.csv', normalizedX[:,:,2], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/train/GyroX'+dataName+'.csv', normalizedX[:,:,3], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/train/GyroY'+dataName+'.csv', normalizedX[:,:,4], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/train/GyroZ'+dataName+'.csv', normalizedX[:,:,5], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/train/Label'+dataName+'.csv', trainy, delimiter=',')

os.makedirs('datasetStandardized/'+dataName+ '/eval', exist_ok=True)
np.savetxt('datasetStandardized/'+dataName+ '/eval/AccX'+dataName+'.csv', normalizedEval[:,:,0], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/eval/AccY'+dataName+'.csv', normalizedEval[:,:,1], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/eval/AccZ'+dataName+'.csv', normalizedEval[:,:,2], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/eval/GyroX'+dataName+'.csv', normalizedEval[:,:,3], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/eval/GyroY'+dataName+'.csv', normalizedEval[:,:,4], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/eval/GyroZ'+dataName+'.csv', normalizedEval[:,:,5], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/eval/Label'+dataName+'.csv', testy, delimiter=',')

os.makedirs('datasetStandardized/'+dataName+ '/all', exist_ok=True)
np.savetxt('datasetStandardized/'+dataName+ '/all/AccX'+dataName+'.csv', normalizedAll[:,:,0], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/all/AccY'+dataName+'.csv', normalizedAll[:,:,1], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/all/AccZ'+dataName+'.csv', normalizedAll[:,:,2], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/all/GyroX'+dataName+'.csv', normalizedAll[:,:,3], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/all/GyroY'+dataName+'.csv', normalizedAll[:,:,4], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/all/GyroZ'+dataName+'.csv', normalizedAll[:,:,5], delimiter=',')
np.savetxt('datasetStandardized/'+dataName+ '/all/Label'+dataName+'.csv', testy, delimiter=',')

os.makedirs('datasetStandardized/'+dataName+ '/features', exist_ok=True)
np.savetxt('datasetStandardized/'+dataName+ '/features/mean-std-var'+dataName+'.csv', stackedFeatures, delimiter=',')