# Experiment

In [2]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import os
from tqdm import tqdm

import definitions
from modules.model.classifier import Classifier
from modules.model.evaluation import Evaluation
from modules.common import Feature

## Loading

In [3]:
labels = pd.read_csv(os.path.join(definitions.DATA_LABEL, os.listdir(definitions.DATA_LABEL)[0]))
labels.head()

Unnamed: 0,bookingID,label
0,111669149733,0
1,335007449205,1
2,171798691856,0
3,1520418422900,0
4,798863917116,0


In [4]:
for key, df in labels.groupby('bookingID'):
    if (len(df)>=2):
        print(key, df['label'].values)

13 [0 1]
154618822837 [1 0]
223338299461 [0 1]
395136991308 [1 0]
403726925929 [0 1]
455266533495 [1 0]
481036337234 [1 0]
515396075694 [0 1]
695784702084 [0 1]
919123001348 [1 0]
970662608932 [0 1]
1279900254294 [1 0]
1348619731077 [1 0]
1391569403991 [0 1]
1408749273124 [0 1]
1511828488211 [1 0]
1632087572573 [1 0]
1649267441751 [1 0]


In [5]:
data = pd.read_csv(os.path.join(definitions.DATA_AGGREGATED, os.listdir(definitions.DATA_AGGREGATED)[0]))
data.head()

Unnamed: 0,bookingID,mean_acceleration_x,mean_acceleration_y,mean_acceleration_z,mean_accuracy,mean_bearing,mean_gyro_x,mean_gyro_y,mean_gyro_z,mean_speed
0,0,-0.711264,-9.613822,-1.619658,10.165339,176.526099,0.003328,-0.006118,-0.004188,8.994822
1,1,-0.525406,9.532086,-2.198999,3.718763,124.19859,-0.002467,-0.00754,0.000405,7.881588
2,2,0.306786,9.843183,0.139347,3.930626,173.794872,0.006458,-0.012861,0.002597,3.157213
3,4,-0.365117,-9.406439,-2.613639,10.0,151.807013,-0.022884,0.023232,-0.000376,6.150996
4,6,0.490616,9.538043,2.355059,4.586721,197.812785,0.003877,0.000436,0.00293,4.628921


In [6]:
data_complete = pd.merge(data, labels, on=Feature.FEAT_booking_id, suffixes=(False, False)).drop_duplicates(subset='bookingID', keep=False)
data_complete.head()

Unnamed: 0,bookingID,mean_acceleration_x,mean_acceleration_y,mean_acceleration_z,mean_accuracy,mean_bearing,mean_gyro_x,mean_gyro_y,mean_gyro_z,mean_speed,label
0,0,-0.711264,-9.613822,-1.619658,10.165339,176.526099,0.003328,-0.006118,-0.004188,8.994822,0
1,1,-0.525406,9.532086,-2.198999,3.718763,124.19859,-0.002467,-0.00754,0.000405,7.881588,1
2,2,0.306786,9.843183,0.139347,3.930626,173.794872,0.006458,-0.012861,0.002597,3.157213,1
3,4,-0.365117,-9.406439,-2.613639,10.0,151.807013,-0.022884,0.023232,-0.000376,6.150996,1
4,6,0.490616,9.538043,2.355059,4.586721,197.812785,0.003877,0.000436,0.00293,4.628921,0


In [9]:
print('0 : ' + str(len(data_complete[data_complete['label']==0])))
print('1 : ' + str(len(data_complete[data_complete['label']==1])))
print('Total : ' + str(len(data_complete)))

0 : 14999
1 : 4983
Total : 19982


## Train

In [10]:
X = data_complete.drop('label', axis=1).values
y = data_complete['label'].values

In [12]:
model = Classifier()
scores = Evaluation.evaluate(model, X, y)

Fold 1: 0.6651823647294589
Fold 2: 0.645565798263193
Fold 3: 0.6594909819639279
Fold 4: 0.6384016064257028
Fold 5: 0.6623962516733601
Fold 6: 0.6548313253012048
Fold 7: 0.6400749665327978
Fold 8: 0.6487751004016065
Fold 9: 0.6533935742971888
Fold 10: 0.6429038368283005
MEAN: 0.6511015806416741
STDDEV: 0.00943704475399599
