In [None]:
import sklearn as sk
import pandas as pd
import numpy as np
from sklearn.utils import shuffle

TO_DROP = ['#timestamp', 'index']

DATASET_PATH = '/content/drive/MyDrive/data/'

# Label
# 0: Not reading
# 1: Reading English
# 2: Reading Japanese horizontal
# 3: Reading Japanese vertical

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from sklearn.preprocessing import StandardScaler

def read_files_days(day):
    # This function read and append 
    persons = ['p2_', 'p3_', 'p4_', 'p5_', 'p6_' , 'p7_', 'p8_', 'p9_', 'p10_']
    
    df_final = pd.read_csv(DATASET_PATH + 'p1_' + day + '.csv')
    df_final = df_final[df_final.label != 0]
    df_final.date_time = df_final.date_time.astype('datetime64[s]')
    df_final.set_index('date_time', drop= True, inplace= True)
    df_final.drop('#timestamp', axis= 1, inplace= True)
    df_final = df_final.rolling('30s').mean()

    for person in persons:
        df = pd.read_csv(DATASET_PATH + person + day + '.csv')
        df = df[df.label != 0]
        df.date_time = df.date_time.astype('datetime64[s]')
        df.set_index('date_time', drop= True, inplace= True)
        df.drop('#timestamp', axis= 1, inplace= True)
        df = df.rolling('30s').mean()
        df_final = df_final.append(df)

    scaler = StandardScaler()
    df_final.iloc[:,0:-1] = scaler.fit_transform(df_final.iloc[:,0:-1].to_numpy())
    return df_final

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

def eval(model, X_test, y_test):
    
    y_pred = model.predict(X_test)
    
    cm = confusion_matrix(y_test, y_pred)
    cm = cm / cm.astype(np.float).sum(axis= 1)
    print('Confusion Matrix:\n\t', cm)

    acc = accuracy_score(y_test, y_pred)
    print('accuracy:\n\t', acc)

    score_f1 = f1_score(y_test, y_pred, average= None)
    print('f1 score:\n\t', score_f1)


In [None]:
df = read_files_days('d1')
print('pandas dataframe')
print(df.head())

# df.drop(df.index, axis= 1, inplace= True)


y_train = df.label.to_numpy()
y_train = np.array(y_train, dtype=int)
X_train = df.drop('label', axis= 1).to_numpy(dtype= np.float16)
print('numpy array (this will be used for training)')
print(X_train[:5])

pandas dataframe
                         left     right     acc_x     acc_y     acc_z  \
date_time                                                               
2019-06-06 09:45:04  2.343210  0.490600 -1.673025  2.032946  2.744057   
2019-06-06 09:45:04  2.371408  0.504542 -1.668741  2.018716  2.773732   
2019-06-06 09:45:04  2.352609  0.493698 -1.658338  2.009594  2.813629   
2019-06-06 09:45:04  2.360834  0.492924 -1.642937  2.000157  2.870547   
2019-06-06 09:45:04  2.377048  0.488741 -1.632146  1.983826  2.935165   

                          roll     pitch       yaw  label  
date_time                                                  
2019-06-06 09:45:04 -10.904685  3.449941  0.740608    2.0  
2019-06-06 09:45:04 -11.262083  3.130638  1.207241    2.0  
2019-06-06 09:45:04 -11.249254  2.746713  1.612476    2.0  
2019-06-06 09:45:04 -11.168610  2.523391  1.910261    2.0  
2019-06-06 09:45:04 -11.082834  2.405362  2.097528    2.0  
numpy array (this will be used for training)
[[  2.

In [None]:
df = read_files_days('d2')

df = shuffle(df)
# df.drop('date_time', axis= 1, inplace= True)

y_test = df.label.to_numpy()
y_test = np.array(y_test, dtype=int)
X_test = df.drop('label', axis= 1).to_numpy(dtype= np.float16)

In [None]:
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
random_forest=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
random_forest.fit(X_train,y_train)

In [None]:
eval(random_forest, X_test, y_test)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gd_boost_default = GradientBoostingClassifier()
gd_boost_default.fit(X_train, y_train)

In [None]:
eval(gd_boost_default, X_test, y_test)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gd_boost_tweaked = GradientBoostingClassifier(learning_rate= 0.2, n_estimators= 50,
                                      max_features= 'sqrt', warm_start= True, subsample= 0.8)
gd_boost_tweaked.fit(X_train, y_train)

In [None]:
eval(gd_boost_tweaked, X_test, y_test)