In [19]:
import db.knowhere_db as kdb
import pipeline.pipeline as pipeline
import pandas as pd
import numpy as np
import pickle

In [20]:
reader = kdb.Reader('knowhere')

In [21]:
glen_H_data_raw = reader.get_dataframe_pivoted(collection='iphone_test3', username='glen', commute=True,\
                                               min_date='2017-3-24', max_date='2017-03-25')

In [22]:
glen_H_data = glen_H_data_raw.dropna(subset=['Acceleration z', 'Acceleration x', 'Acceleration y'], how='all')

In [23]:
glen_commute_labels = pickle.load( open("glen_commute_labels.p", "rb" ))
glen_commute_labels

{'2017-3-20': [('7:33:33', 'D'),
  ('7:41:55', 'W'),
  ('7:49:01', 'D'),
  ('7:52:45', 'W'),
  ('7:55:45', 'S_D'),
  ('7:55:53', 'P_W'),
  ('7:56:53', 'P_S'),
  ('8:00:23', 'T_B'),
  ('8:16:15', 'T_T'),
  ('8:21:05', 'T_B'),
  ('8:30:30', 'T_E'),
  ('8:31:15', 'S_U'),
  ('8:31:20', 'W'),
  ('8:32:05', 'S_U'),
  ('8:32:10', 'W'),
  ('8:32:35', 'S_U'),
  ('8:32:50', 'W'),
  ('8:36:40', 'E_U'),
  ('8:37:50', 'W'),
  ('8:30:10', 'W'),
  ('20:05:55', 'W'),
  ('20:06:30', 'E_D'),
  ('20:07:05', 'W'),
  ('20:13:00', 'S'),
  ('20:27:25', 'T_D'),
  ('20:36:50', 'T_S'),
  ('20:38:14', 'T_D'),
  ('20:44:10', 'T_S'),
  ('20:44:10', 'T_S'),
  ('20:45:15', 'T_D'),
  ('20:46:51', 'T_S'),
  ('20:47:50', 'T_D'),
  ('20:49:15', 'T_S'),
  ('20:50:50', 'T_D'),
  ('20:51:15', 'T_S'),
  ('20:54:04', 'T_D'),
  ('20:53:59', 'T_E'),
  ('20:54:50', 'S_U'),
  ('20:55:05', 'W'),
  ('20:57:05', 'D'),
  ('21:08:45', 'W')],
 '2017-3-21': [('7:33:10', 'W'),
  ('7:34:15', 'D'),
  ('7:48:15', 'W'),
  ('7:50:30', 'D'),


In [24]:

class CommuteLabeler(object):
    
    def __init__(self, username, events, min_date, max_date):
        self.username = username
        self.events = events
        self.min_date = min_date
        self.max_date = max_date
        self.df = reader.get_dataframe_pivoted(collection='iphone_test3', username=username, commute=True,\
                                               min_date=min_date, max_date=max_date)\
                                                .dropna(subset=['Acceleration z', 'Acceleration x', 'Acceleration y'], how='all')

    def add_classifications(self):    
        '''Pass events as a list of tuples (datetime, eventcode) to classify different events'''
        for days_events in self.events.iterkeys():
            if pd.to_datetime(days_events) in pd.date_range(start=self.min_date, end=self.max_date, freq='D'):
                for event in self.events[days_events]:
                    self.df.loc[days_events + " " + event[0], 'classification'] = event[1]
        self.df.fillna(method='ffill', inplace = True)
        return "Finished!"

    def label_raw_iphone(self):
        self.df.index = pd.to_datetime(self.df.index)
        self.df['classification'] = None
        self.add_classifications()
        return self.df
    
    def write_labels_to_files(self):
        label_raw_iphone()
        self.df.to_csv("Labeled_" + self.username + "_" + self.min_date + " to " + self.max_date + ".csv")
        return "Finished!"

In [25]:
glen_as_a_class = CommuteLabeler('glen', glen_commute_labels, '03-24-2017', '03-25-2017')

In [26]:
glen_as_a_class.label_raw_iphone()

Unnamed: 0,GPS Horizontal Accuracy,GPS Longitude,GPS Vertical Accuracy,Magnetometer z,Altimeter (Barometer) Pressure,GPS Latitude,GPS Altitude,Magnetometer y,Magnetometer x,Altimeter (Barometer) Relative Altitude,...,Gyrometer y,Gyrometer z,Gyrometer x,Gravity z,Gravity y,Gravity x,Acceleration z,Acceleration x,Acceleration y,classification
2017-03-24 06:31:10,118.858780,-73.753457,10.0,-441.740631,,40.736523,49.432316,113.658890,-65.585632,,...,0.169663,-0.200276,0.195217,-0.999999,-0.000977,0.000850,0.000000,0.000000,0.000000,W
2017-03-24 06:31:15,10.000000,-73.753646,12.0,-445.337341,102.441696,40.736736,47.433746,121.556625,-66.959152,-0.200317,...,-0.160387,-0.636138,0.057329,-0.947613,-0.315372,-0.050694,0.066937,-0.023296,-0.018811,W
2017-03-24 06:31:19,10.000000,-73.753643,8.0,-445.337341,102.441505,40.736720,47.527802,121.556625,-66.959152,-0.185364,...,-0.160387,-0.636138,0.057329,-0.839087,-0.538034,0.080332,0.017675,-0.015955,0.008646,W
2017-03-24 06:31:24,10.000000,-73.753649,4.0,-445.337341,102.440567,40.736675,47.847198,121.556625,-66.959152,-0.107628,...,-0.160387,-0.636138,0.057329,-0.611503,-0.520208,-0.596195,-0.014916,-0.027630,-0.027705,W
2017-03-24 06:31:28,10.000000,-73.753649,4.0,-445.337341,102.443634,40.736627,48.188568,121.556625,-66.959152,-0.360764,...,-0.160387,-0.636138,0.057329,-0.684826,-0.697082,0.212342,-0.106723,0.080779,-0.055970,W
2017-03-24 06:31:33,10.000000,-73.753659,3.0,-445.337341,102.454315,40.736601,49.368378,121.556625,-66.959152,-1.241699,...,-0.160387,-0.636138,0.057329,-0.001917,-0.993844,0.110772,-0.190725,0.242347,0.272881,W
2017-03-24 06:31:37,10.000000,-73.753658,3.0,-445.337341,102.461502,40.736574,49.540436,121.556625,-66.959152,-1.834595,...,-0.160387,-0.636138,0.057329,-0.113701,-0.959559,0.257522,-0.137641,-0.007537,0.121181,W
2017-03-24 06:31:42,10.000000,-73.753630,3.0,-445.337341,102.468178,40.736531,48.258698,121.556625,-66.959152,-2.385605,...,-0.160387,-0.636138,0.057329,-0.216529,-0.971583,0.095613,0.077979,-0.460572,-0.016713,W
2017-03-24 06:31:46,10.000000,-73.753586,3.0,-445.337341,102.470299,40.736544,47.941010,121.556625,-66.959152,-2.559967,...,-0.160387,-0.636138,0.057329,-0.273305,-0.946756,0.170168,-0.279018,-0.029696,-0.123480,W
2017-03-24 06:31:51,10.000000,-73.753592,3.0,-445.337341,102.469666,40.736560,47.134979,121.556625,-66.959152,-2.508156,...,-0.160387,-0.636138,0.057329,-0.268927,-0.960738,0.068275,-0.053827,-0.035942,0.019347,W


In [27]:
glen_as_a_class.df.groupby("classification").size()

classification
D      150
E_U      1
S        1
T_D      1
T_E      1
W      682
dtype: int64

In [31]:
glen_as_a_class.df.to_csv('Labeled_Test_Data_Glen_03_25_2017.csv')