# Curating the GPS Data
To help determine times when participants are home, we can provide Peter with the GPS data. However, we need to include the relevant data.

In [48]:
import warnings
warnings.filterwarnings('ignore')

# Package Import

In [49]:
import sys
import os
sys.path.append('../')

from src.visualization import visualize

import pandas as pd
pd.set_option('display.max_columns', 200)
import numpy as np

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Process
We are going to need to use the beacon data to wittle down the raw GPS data based on:
1. Participants who received BEVOs
2. Timeframe during which Participants had their BEVOs

## Beacon Data
We can just use the processed beacon data to find the participants and the first and last data point that were logged. 

In [50]:
beacon = pd.read_csv("../data/processed/beacon-ux_s20.csv",parse_dates=["timestamp"],infer_datetime_format=True)
beacon.head()

Unnamed: 0,timestamp,tvoc,lux,no2,co,co2,pm1_number,pm2p5_number,pm10_number,pm1_mass,pm2p5_mass,pm10_mass,temperature_c,rh,beacon,beiwe,redcap
0,2020-06-08 13:00:00,67.766667,3.61488,3.526111,13.922047,,12.081799,11.458559,11.201085,0.74428,0.429834,1.935866,27.383333,46.586667,1,kyj367pi,10
1,2020-06-08 13:02:00,67.9625,3.64395,3.526111,13.906931,,12.157965,11.542477,11.28288,0.750738,0.40297,1.940782,27.390625,46.58125,1,kyj367pi,10
2,2020-06-08 13:04:00,68.847059,3.63516,3.529306,13.893371,,12.044653,11.436841,11.182763,0.742682,0.482686,1.937115,27.397059,46.597059,1,kyj367pi,10
3,2020-06-08 13:06:00,69.788889,3.58734,3.529677,13.874056,,12.01994,11.401453,11.147062,0.739928,0.52202,1.933971,27.402778,46.619444,1,kyj367pi,10
4,2020-06-08 13:08:00,70.552632,3.582777,3.530139,13.862026,,12.040436,11.431434,11.17738,0.742263,0.494546,1.936699,27.407895,46.639474,1,kyj367pi,10


### Getting Participants and the Start and Stop Times

In [51]:
deployment_info = {"pts":[],"starts":[],"ends":[]}
for pt in beacon["beiwe"].unique():
    deployment_info["pts"].append(pt)
    beacon_pt = beacon[beacon["beiwe"] == pt]
    beacon_pt.dropna(subset=["co2","tvoc","pm2p5_mass"],how="all",inplace=True)
    beacon_pt.sort_values("timestamp",inplace=True)
    deployment_info["starts"].append(beacon_pt["timestamp"].iloc[0])
    deployment_info["ends"].append(beacon_pt["timestamp"].iloc[-1])
    
deployment_df = pd.DataFrame(deployment_info) # infor stored as df

## GPS Data
Now we import the gps data from the relevant participants and utilize only the important bits.

In [52]:
data_dir='/Volumes/HEF_Dissertation_Research/utx000/data/raw/utx000/beiwe/gps/'
gps_df = pd.DataFrame()
for pt in os.listdir(data_dir):
    if pt in deployment_df["pts"].values:
        print(f'Working for Participant: {pt}')
        pt_df = pd.DataFrame() # 
        for file in os.listdir(f'{data_dir}{pt}/gps/'):
            if file[-1] == 'v': # so we only import cs[v] files
                try:
                    hourly_df = pd.read_csv(f'{data_dir}{pt}/gps/{file}',usecols=[1,2,3,4,5]) # all columns but UTC
                except KeyError:
                    print(f'Problem with gps data for {file} for Participant {pt}')

                if len(hourly_df) > 0: # append to participant df if there were data for that hour
                    pt_df = pt_df.append(hourly_df,ignore_index=True)

        print("\tImported Successfully")
        # converting utc to cdt
        pt_df['timestamp'] = pd.to_datetime(pt_df['UTC time']) - timedelta(hours=5)
        pt_df.set_index('timestamp',inplace=True)
        # subsetting based on start and stop
        s = pd.to_datetime(deployment_df[deployment_df["pts"] == pt]["starts"].values[0])
        e = pd.to_datetime(deployment_df[deployment_df["pts"] == pt]["ends"].values[0])
        pt_df = pt_df[s:e]
        # converting values to numeric and removing NaN datapoints
        pt_df.columns = ['utc','lat','long','altitude','accuracy']
        for col in ['lat','long','altitude','accuracy']:
            pt_df[col] = pd.to_numeric(pt_df[col],errors='coerce')

        pt_df.dropna(inplace=True)
        if len(pt_df) > 0:
            pt_df["beiwe"] = pt
            pt_df.to_csv(f"../data/interim/gps_beacon/gps_{pt}.csv")
            print("\tSaved to file")
            gps_df = gps_df.append(pt_df)

try:
    gps_df.to_csv(f'../data/processed/beiwe-gps_beacon_pts-ux_s20.csv')
    print("SUCCESS")
except:
    print("FAILURE")

Working for Participant: 2xtqkfz1
	Imported Successfully
	Saved to file
Working for Participant: 4i7679py
	Imported Successfully
	Saved to file
Working for Participant: 745vq78e
	Imported Successfully
	Saved to file
Working for Participant: 9jtzsuu8
	Imported Successfully
	Saved to file
Working for Participant: 9xmhtq74
	Imported Successfully
	Saved to file
Working for Participant: awa8uces
	Imported Successfully
	Saved to file
Working for Participant: e73a1pd5
	Imported Successfully
	Saved to file
Working for Participant: hxj6brwj
	Imported Successfully
	Saved to file
Working for Participant: i31pt4b4
	Imported Successfully
	Saved to file
Working for Participant: i4w8dx6l
	Imported Successfully
Working for Participant: idbkjh8u
	Imported Successfully
	Saved to file
Working for Participant: itmylz3g
	Imported Successfully
	Saved to file
Working for Participant: kyj367pi
	Imported Successfully
	Saved to file
Working for Participant: lkkjddam
	Imported Successfully
	Saved to file
Working

In [53]:
gps_df

Unnamed: 0_level_0,utc,lat,long,altitude,accuracy,beiwe
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-06-22 23:56:05.453,2020-06-23T04:56:05.453,32.923093,-96.962686,151.085648,65.000000,2xtqkfz1
2020-06-22 23:56:06.028,2020-06-23T04:56:06.028,32.923058,-96.962664,151.277084,65.000000,2xtqkfz1
2020-06-22 23:56:06.028,2020-06-23T04:56:06.028,32.923033,-96.962649,151.328857,65.000000,2xtqkfz1
2020-06-22 23:56:06.435,2020-06-23T04:56:06.435,32.923057,-96.962649,151.253174,65.000000,2xtqkfz1
2020-06-22 23:56:07.554,2020-06-23T04:56:07.554,32.923093,-96.962674,153.706418,47.814749,2xtqkfz1
...,...,...,...,...,...,...
2020-08-31 20:45:00.309,2020-09-01T01:45:00.309,30.367271,-97.793406,249.349107,33.566909,zdpffrox
2020-08-31 20:45:01.309,2020-09-01T01:45:01.309,30.367265,-97.793413,249.318866,30.638139,zdpffrox
2020-08-31 20:45:02.309,2020-09-01T01:45:02.309,30.367276,-97.793418,249.361290,29.289974,zdpffrox
2020-08-31 20:45:03.309,2020-09-01T01:45:03.309,30.367285,-97.793421,249.399093,29.563116,zdpffrox
