In [1]:
import os
import numpy as np

In [2]:
import pandas as pd
from sklearn import datasets, metrics, svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import tree

In [3]:
input_folder = "data"

In [4]:
activities = {}

In [5]:
subfolders = os.listdir(input_folder)

In [6]:
for folder in subfolders:
    test_path = os.path.join(input_folder, folder)
    if os.path.isdir(test_path):
        activities[folder] = pd.DataFrame()
        frames = []
        for file in os.listdir(test_path):
            test_file = os.path.join(test_path,file)
            if os.path.isfile(test_file):
                new_dataframe = pd.read_csv(test_file)
                frames.append(new_dataframe)
        activities[folder]=pd.concat(frames)

In [7]:
code = 0
for key in activities.keys():
    activities[key]["code"]=key
    code+=1

In [8]:
all_activities = pd.concat([activities[key] for key in activities.keys()])

In [9]:
clf = svm.SVC()

In [10]:
dtree_clf = tree.DecisionTreeClassifier(max_depth=5, random_state=42)

In [11]:
# Split data into 50% train and 50% test subsets
X_train, X_test, y_train, y_test = train_test_split(
    all_activities.iloc[:,:-1], all_activities["code"],test_size=0.5, shuffle=False
)

In [12]:
clf.fit(X_train, y_train)
dtree_clf.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=42,
            splitter='best')

In [13]:
clf_y = clf.predict(X_test)
dtree_clf_y = dtree_clf.predict(X_test)

In [14]:
clf_accuracy = accuracy_score(clf_y, y_test)
dtree_clf_accuracy = accuracy_score(dtree_clf_y, y_test)

In [15]:
print("svm clf accuracy: ",clf_accuracy)
print("decision tree clf accuracy: ", dtree_clf_accuracy)

svm clf accuracy:  0.460784313725
decision tree clf accuracy:  0.455882352941


In [16]:
print("decision tree clf accuracy: " + str(dtree_clf_accuracy))

decision tree clf accuracy: 0.455882352941


In [17]:
print("mean values:")
all_activities.groupby(by = 'code').mean()


mean values:


Unnamed: 0_level_0,accelerometer_X,accelerometer_Y,accelerometer_Z
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,-0.052313,-0.108151,9.749668
running,7.141621,6.13602,3.571191
stairs,2.320608,-9.614764,-0.87532
walking,1.026634,-9.991548,-0.92358


In [18]:
print("max values:")
all_activities.groupby(by = 'code').max()


max values:


Unnamed: 0_level_0,accelerometer_X,accelerometer_Y,accelerometer_Z
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,5.09965,4.616021,9.80665
running,26.499025,27.159824,25.775976
stairs,12.650962,2.973599,10.716447
walking,12.363658,4.668694,21.648373


In [19]:
print("min values:")
all_activities.groupby(by = 'code').min()

min values:


Unnamed: 0_level_0,accelerometer_X,accelerometer_Y,accelerometer_Z
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,-0.909797,-0.282516,8.418014
running,-10.994174,-20.269312,-7.254431
stairs,-6.847417,-19.416977,-14.422671
walking,-9.504981,-23.376986,-26.3027


In [20]:
amount = all_activities.groupby(by = 'code').count()["accelerometer_X"].sum()

In [21]:
amount

2040

In [22]:
all_activities.groupby(by = 'code').count()["accelerometer_X"]

code
idle       360
running    390
stairs     780
walking    510
Name: accelerometer_X, dtype: int64

In [23]:
frequencies = all_activities.groupby(by = 'code').count()["accelerometer_X"]/amount

In [24]:
shennon_values = frequencies * np.log(frequencies)

In [26]:
entropy = shennon_values.sum()

In [28]:
print("entropy: ",entropy)

entropy:  -1.3365906716177216
