In [3]:
from dateutil import tz
from datetime import datetime
import json
import pandas as pd
from gps_utils import find_closest_index, distance, to_date
import numpy as np

def to_datetime(timestamp):
    """
    Convert timestamp to datetime
    """
    datetime_timestamp = datetime.strptime(
        timestamp.rsplit(':', 1)[0], "%Y-%m-%dT%H:%M")
    return datetime_timestamp

def image_to_datetime(minute_id, image_id):
    utc_time = datetime.strptime(minute_id+"00", "%Y%m%d_%H%M%S")
    minute = int(image_id[11:13])
    second = int(image_id[13:15])
    image_time = utc_time.replace(minute=minute, second=second)
    return image_time

def find_closest_image(timestamp):
    #convert to datetime
    datetime_timestamp = datetime.strptime(
        timestamp.rsplit(':', 1)[0], "%Y-%m-%dT%H:%M")
    # find images
    i = find_closest_index(all_image_times, datetime_timestamp)
    return metadata.loc[i, "ImageID"]

google_maps = json.load(open("../../original_data/2019_JANUARY.json"))
metadata = pd.read_csv('../files/processed.csv', sep=',', decimal='.', nrows=100000)
all_image_times = [image_to_datetime(minute_id, image_id) for (minute_id, image_id) in zip(metadata["minute_id"], metadata["ImageID"])]
all_image_ids = list(metadata["ImageID"])

In [4]:
segment_rows = []
for segment in google_maps['timelineObjects']:
    if 'placeVisit' in segment:
        segment = segment['placeVisit']
        segment_rows.append({"stop": True,
                             "checkin": segment['location']['name'],
                             "first": segment['duration']['startTimestamp'],
                             "last": segment['duration']['endTimestamp'],
                             "lat": segment['location']["latitudeE7"] * 1e-7,
                             "lon": segment['location']["longitudeE7"] * 1e-7,
                             "end_latitude": "",
                             "end_longitude": "",
                             "points": []})
    else:
        segment = segment['activitySegment']
        points = []
        if "simplifiedRawPath" in segment:
            points = segment["simplifiedRawPath"]["points"]
        else:
            points = segment["waypointPath"]["waypoints"]
        segment_rows.append({"stop": False,
                             "checkin": segment['activityType'],
                             "first": segment['duration']['startTimestamp'],
                             "last": segment['duration']['endTimestamp'],
                             "lat": segment['startLocation']["latitudeE7"] * 1e-7,
                             "lon": segment['startLocation']["longitudeE7"] * 1e-7,
                             "end_latitude": segment['endLocation']["latitudeE7"] * 1e-7,
                             "end_longitude": segment['endLocation']["longitudeE7"] * 1e-7,
                             "points": points})

segment_df = pd.DataFrame.from_dict(segment_rows)

In [7]:
segment_df = segment_df[['stop', 'checkin', 'first', 'last', 'lat', 'lon', 'end_latitude', 'end_longitude', 'points']]
segment_df['duration'] = segment_df['last'].apply(to_datetime) - segment_df['first'].apply(to_datetime)
segment_df['duration'] = segment_df['duration'].apply(lambda x: x.seconds) / 60
segment_df["first"] = [find_closest_image(
    timestamp) for timestamp in segment_df["first"]]
segment_df["last"] = [find_closest_image(
    timestamp) for timestamp in segment_df["last"]]

In [8]:
from tqdm.notebook import tqdm
tqdm.pandas()
minute_ids = metadata['minute_id'].values.tolist()
minute_id_to_index = {minute_id: i for (i, minute_id) in enumerate(minute_ids)}
metadata["new_long"] = metadata["longitude"]
metadata["new_lat"] = metadata["latitude"]

for i, row in tqdm(segment_df.iterrows(), total=len(segment_df)):
    rows_to_change = range(minute_id_to_index[row["first"]], minute_id_to_index[row["end"]] + 1)
    if row["stop"]:
        for row_to_change in rows_to_change:
            prev_lat = None
            prev_lng = None
            if np.isnan(metadata.iloc[row_to_change]["new_long"]):
                if prev_lat:
                    if distance(prev_lat, prev_lng, row["latitude"], row["longitude"]) < 50:
                        metadata.loc[row_to_change, "new_lat"] = prev_lat
                        metadata.loc[row_to_change, "new_long"] = prev_lng
                    else:
                        metadata.loc[row_to_change, "new_lat"] = row["latitude"]
                        metadata.loc[row_to_change, "new_long"] = row["longitude"]
                prev_lat = metadata.loc[row_to_change, "new_lat"] 
                prev_lng = metadata.loc[row_to_change, "new_long"] 
    else:
        if 'timestamp' in row["points"][0]: # later
            prev_row_to_change = rows_to_change[0]
            for point in row["points"]:
                nearest_minute_id = find_closest_minute_id(point['timestamp'])
                if np.isnan(metadata.iloc[minute_id_to_index[nearest_minute_id]]["new_long"]):
                    metadata.loc[minute_id_to_index[nearest_minute_id], "new_lat"] = point["latE7"] * 1e-7
                    metadata.loc[minute_id_to_index[nearest_minute_id], "new_long"] = point["lngE7"] * 1e-7
                prev_row_to_change = minute_id_to_index[nearest_minute_id]
        else:
            num_segments = len(rows_to_change)
            for i, point in enumerate(row["points"]):
                index = rows_to_change[0] + i * num_segments // len(row["points"])
                metadata.loc[index, "new_lat"] = point["latE7"] * 1e-7
                metadata.loc[index, "new_long"] = point["lngE7"] * 1e-7
    metadata.loc[rows_to_change, ["new_lat", "new_long"]] = metadata.loc[rows_to_change, ["new_lat", "new_long"]].interpolate()            

  0%|          | 0/420 [00:00<?, ?it/s]

KeyError: '20190101_130254_000.jpg'

In [25]:
import bamboolib as bam
metadata["longitude"] = metadata["new_long"]
metadata["latitude"] = metadata["new_lat"]
metadata = metadata.drop(columns=['new_lat', 'new_long'])
metadata = metadata.loc[metadata['minute_id'].str.startswith('201901', na=False)]
metadata

Unnamed: 0.1,Unnamed: 0,minute_id,utc_time,local_time,latitude,longitude,altitude,semantic_name,time_zone,heart_rate(bpm),...,album name,sleep_level,awake,minutesToFallAsleep,minutesAsleep,minutesAwake,minutesAfterWakeup,timeInBed,sleep_efficiency,ImageID
0,0,20190101_0000,,,,,,,,85.0,...,,,,0.0,299.0,52.0,5.0,351.0,90.0,
1,1,20190101_0001,,,,,,,,88.0,...,Sleep: 111 Pieces Of Classical Music For Bedtime,,,0.0,299.0,52.0,5.0,351.0,90.0,
2,2,20190101_0002,,,,,,,,89.0,...,,,,0.0,299.0,52.0,5.0,351.0,90.0,
3,3,20190101_0003,,,,,,,,88.0,...,,,,0.0,299.0,52.0,5.0,351.0,90.0,
4,4,20190101_0004,,,,,,,,84.0,...,,,,0.0,299.0,52.0,5.0,351.0,90.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44635,44635,20190131_2355,,,53.389977,-6.145787,,,,73.0,...,,light,,,,,,,,
44636,44636,20190131_2356,,,53.389977,-6.145786,,,,72.0,...,,light,,,,,,,,
44637,44637,20190131_2357,,,53.389977,-6.145786,,,,72.0,...,,light,,,,,,,,
44638,44638,20190131_2358,,,53.389977,-6.145786,,,,72.0,...,,light,,,,,,,,


In [26]:
metadata.to_csv("temp/filled_metadata.csv")

In [20]:
segment_df

Unnamed: 0,type,name,start,end,latitude,longitude,end_latitude,end_longitude,points
0,moving,IN_PASSENGER_VEHICLE,20190101_1303,20190101_1341,53.389945,-6.146047,53.3424,-6.25993,"[{'latE7': 533887253, 'lngE7': -61469694, 'acc..."
1,stop,Brown Thomas,20190101_1341,20190101_1410,53.342429,-6.259931,,,[]
2,moving,WALKING,20190101_1410,20190101_1411,53.342429,-6.259931,53.341,-6.26048,"[{'latE7': 533424957, 'lngE7': -62602528, 'acc..."
3,stop,Dubray Books,20190101_1411,20190101_1523,53.340967,-6.260477,,,[]
4,moving,WALKING,20190101_1523,20190101_1546,53.341804,-6.260134,53.3426,-6.26216,"[{'latE7': 533418083, 'lngE7': -62601323}, {'l..."
...,...,...,...,...,...,...,...,...,...
415,moving,WALKING,20190131_2000,20190131_2004,53.385573,-6.258757,53.387,-6.25754,"[{'latE7': 533866986, 'lngE7': -62582949, 'acc..."
416,moving,IN_PASSENGER_VEHICLE,20190131_2004,20190131_2021,53.387026,-6.257541,53.3867,-6.14707,"[{'latE7': 533822153, 'lngE7': -62437424, 'acc..."
417,stop,Kilbarrack Dental Care,20190131_2021,20190131_2115,53.386613,-6.147290,,,[]
418,moving,IN_PASSENGER_VEHICLE,20190131_2115,20190131_2128,53.386613,-6.147290,53.39,-6.14581,"[{'latE7': 533880316, 'lngE7': -61503183, 'acc..."
