In [1]:
from Chapter2.CreateDataset import CreateDataset
import pandas as pd
import matplotlib.pyplot as plt
import os
import copy
from util.VisualizeDataset import VisualizeDataset
from util import util

In [2]:
rawdatapath = "data/raw/"
outputpath = "data/intermediate/"

if not os.path.exists(outputpath):
    print("Empty path, so creating..." + outputpath + "...done!")
    os.makedirs(outputpath)

In [4]:
accraw= pd.read_csv(rawdatapath+"Accelerometer.csv")
timecol = "Time (s)"
accfeatures = list()
for col in accraw:
    if (col != timecol) and \
    (col != "Absolute acceleration (m/s^2)"):
        accfeatures.append(col)
accfeatures

['Acceleration x (m/s^2)', 'Acceleration y (m/s^2)', 'Acceleration z (m/s^2)']

In [7]:
accraw

Unnamed: 0,Time (s),Acceleration x (m/s^2),Acceleration y (m/s^2),Acceleration z (m/s^2),Absolute acceleration (m/s^2)
0,0.018694,-1.977070,7.584650,6.073283,9.915669
1,0.021144,-2.020140,7.582257,6.054141,9.910812
2,0.023594,-2.003391,7.591828,6.054141,9.914739
3,0.026045,-2.008176,7.579864,6.056533,9.908012
4,0.028495,-1.993819,7.591828,6.020641,9.892385
...,...,...,...,...,...
124856,300.012150,-2.254936,7.548757,5.733502,9.743794
124857,300.014552,-2.257329,7.560721,5.735895,9.755026
124858,300.016956,-2.235794,7.536793,5.757431,9.744231
124859,300.019358,-2.245365,7.541579,5.764609,9.754373


In [5]:
gyraw= pd.read_csv(rawdatapath+"Gyroscope.csv")
gyfeatures = list()
for col in gyraw:
    if (col != timecol) and \
    (col != "Absolute (rad/s)"):
        gyfeatures.append(col)
gyfeatures

['Gyroscope x (rad/s)', 'Gyroscope y (rad/s)', 'Gyroscope z (rad/s)']

In [8]:
gyraw

Unnamed: 0,Time (s),Gyroscope x (rad/s),Gyroscope y (rad/s),Gyroscope z (rad/s),Absolute (rad/s)
0,0.017159,0.278546,0.088420,-0.123391,0.317225
1,0.019610,0.267551,0.084755,-0.125835,0.307573
2,0.022060,0.257778,0.082312,-0.129500,0.299991
3,0.024510,0.248004,0.079868,-0.131943,0.292051
4,0.026961,0.233344,0.081090,-0.136830,0.282396
...,...,...,...,...,...
124847,300.006209,-0.002596,0.007788,0.045203,0.045942
124848,300.008611,0.023059,-0.001985,0.041538,0.047551
124849,300.011015,0.046272,-0.006872,0.034207,0.057952
124850,300.013417,0.068262,-0.008094,0.026877,0.073808


In [6]:
lightraw= pd.read_csv(rawdatapath+"Light.csv")
lightfeatures = list()
for col in lightraw:
    if (col != timecol):
        lightfeatures.append(col)
lightfeatures

['Illuminance (lx)']

In [9]:
lightraw

Unnamed: 0,Time (s),Illuminance (lx)
0,0.000000,10.5
1,0.091584,8.9
2,0.291686,8.3
3,0.491685,7.9
4,0.691617,6.8
...,...,...
1503,299.191585,4.0
1504,299.291687,3.1
1505,299.491589,3.3
1506,299.691685,2.7


In [11]:
locraw = pd.read_csv(rawdatapath+"Location.csv")
locfeatures = list()
for col in locraw:
    if (col != timecol) and \
    (col != "Satellites"):
        locfeatures.append(col)
locfeatures

['Latitude (°)',
 'Longitude (°)',
 'Altitude (m)',
 'Altitude WGS84 (m)',
 'Speed (m/s)',
 'Direction (°)',
 'Distance (km)',
 'Horizontal Accuracy (m)',
 'Vertical Accuracy (m)']

In [12]:
magraw= pd.read_csv(rawdatapath+"Magnetometer.csv")
magfeatures = list()
for col in magraw:
    if (col != timecol) and \
    (col != "Absolute field (µT)"):
        magfeatures.append(col)
magfeatures

['Magnetic Field x (µT)', 'Magnetic Field y (µT)', 'Magnetic Field z (µT)']

In [None]:
granularities = [60000, 250]
datasets = []

In [None]:
for g in granularities:
    
    Dataset = CreateDataset(rawdatapath, g)

    Dataset.add_numerical_dataset("Accelerometer.csv", timecol, accfeatures, "avg", "acc_")

    Dataset.add_numerical_dataset("Gyroscope.csv", timecol, gyfeatures, "avg", "gyr_")
    
    Dataset.add_numerical_dataset("Light.csv", timecol, lightfeatures, "avg", "lux_")
    
    Dataset.add_numerical_dataset("Location.csv", timecol, locfeatures, "avg", "loc_")
    
    Dataset.add_numerical_dataset("Magnetometer.csv", timecol, magfeatures, "avg", "mag_")
    #Dataset = accraw
    dataset = Dataset.data_table
    print(dataset)
    Dataviz = VisualizeDataset()
    
    Dataviz.plot_dataset_boxplot(dataset, ['acc_Acceleration x (m/s^2)', 'acc_Acceleration y (m/s^2)', 'acc_Acceleration z (m/s^2)'])
    #Dataviz.plot_dataset_boxplot(dataset, ['acc_x', 'acc_y', 'acc_z'])
    Dataviz.plot_dataset(dataset, ["acc_", "gyr_", "lux_", "loc_", "mag_"], \
                            ["like", "like", "like", "like", "like"], \
                            ["line", "line", "line", "line", "line"])
    
    util.print_statistics(dataset)
    datasets.append(copy.deepcopy(dataset))

In [None]:
util.print_latex_table_statistics_two_datasets(datasets[0], datasets[1])
dataset.to_csv(outputpath + 'chapter2_result.csv')