### follow the idea found in these articles
### doesn't use the number of satellites used
#### https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4046178/
#### https://pubs.rsc.org/en/content/articlelanding/2012/EM/c2em30190c#!divAbstract
#### The test dataset, due to technical difficulty, has a 7 hour differences, compared with activity diary

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

## data was collected through the 'GPS tracker' App offered in iOS
#### Three sample days were collected by Lori and then processed using the following code
#### each day collected around 9 hrs of data

In [2]:
filename = ['Test_1.csv','Test_2.csv','Test_3.csv']
outputname = ['sep_26_manual.xlsx','sep_27_manual.xlsx','sep_28_manual.xlsx']

#### Additional idea: Indoor status was confirmed only when the used-NSAT was maintained for at least 3 min. 
#### Speed was utilized to further classify outdoor points into transit or walking. Walking status was finalized only when the speed was maintained for at least 3 min. 
### my approach: assign 0 to indoor, 1 to outdoor, 2 to in transit

In [3]:
for j in range (len(filename)):
    GPS = pd.read_csv(filename[j],delimiter=';',header=0)
    # convert from UTC to US/Pacific timezone
    GPS['new_time'] = pd.to_datetime(GPS['timestamp']).dt.tz_convert('US/Pacific').dt.tz_localize(None)
    GPS.drop('timestamp',axis=1,inplace=True)
    GPS.drop_duplicates(inplace=True)
    GPS['dummy'] = 0 # solve the duplicated indices
    GPS = GPS.groupby('dummy').apply(lambda x : x.drop_duplicates('new_time')
                                          .set_index('new_time')
                                          .resample('10S') # resample to every 10s
                                          .ffill())
    GPS.drop('dummy',axis=1,inplace=True)
    GPS = GPS.iloc[1:,:] # drop first null column
    
    # manually assign label for different conditions
    GPS['manual_judgement'] = 0
    # using vertical/horizontal accuracy and speed to classify the condition
    for i in range(GPS.shape[0]):
        if GPS.iloc[i,5] <= 0:
            if GPS.iloc[i,3] >= 50: # would mis-classify when the user is near window ( would be like 30, or 10 ish)
                GPS.iloc[i,-1] = 0
            else:
                GPS.iloc[i,-1] = 1
        elif GPS.iloc[i,5] <=8: # normal walking speed is 3-6, running is 12-14
            if GPS.iloc[i,3] >= 50:
                GPS.iloc[i,-1] = 0
            else:
                GPS.iloc[i,-1] = 1

        else:
            GPS.iloc[i,-1] = 2 # would mis-classify when the bus stopped for traffic
            
    GPS.to_excel(outputname[j])

In [2]:
openmap_26 = pd.read_csv('sep_26.csv')
openmap_27 = pd.read_csv('sep_27.csv')

## Data was recorded through Andriod app GPS Logger on Oct 7

In [85]:
filename = ['GPS_logger_Oct_7.txt']
outputname = ['logger_oct_7.xlsx']

In [86]:
for i in range(len(filename)):
    logger = pd.read_csv(filename[i],delimiter=',')
    logger.drop(['type','name','desc','bearing(deg)'],axis=1,inplace=True)
    logger['new_time'] = pd.to_datetime(logger['date time']).dt.tz_localize('UTC').dt.tz_convert('US/Pacific').dt.tz_localize(None)
    logger.drop('date time',axis=1,inplace=True)
    logger = logger.resample('10S',on='new_time').mean()
    logger['sat_ratio'] = (logger['sat_used'] / logger['sat_inview']) *1.0
    logger = logger.fillna(-1)
    logger.to_excel(outputname[i])