# Process Google Timeline Data

In [17]:
import os
from dotenv import load_dotenv

# VARIABLES
folder = "EZ_data" # ONLY need to change this value

json_file = folder + "/location-history.json"
cluster_counts_file_path = folder + "/cluster_counts.csv"
cluster_data_file_path = folder + "/cluster_means.csv"
places_data_file_path = folder + "/places_info.csv"

load_dotenv()
# Make sure to create a .env file and store your API KEY
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [18]:
import json
import pandas as pd
import numpy as np
import requests
from sklearn.cluster import DBSCAN
from geopy.distance import great_circle
from datetime import datetime
import matplotlib.pyplot as plt

### Load Data

In [19]:
def load_google_timeline(json_file):
    with open(json_file, "r", encoding="utf-8") as file:
        data = json.load(file)
    
    locations = []
    for entry in data:
        if "visit" in entry:
            visit_info = entry["visit"]
            top_candidate = visit_info.get("topCandidate", {})

            if "placeLocation" in top_candidate:
                latlon_str = top_candidate["placeLocation"].replace("geo:", "")
                lat, lon = map(float, latlon_str.split(",")) 
                
                start_time = entry.get("startTime", "")
                end_time = entry.get("endTime", "")

                locations.append((lat, lon, start_time, end_time))

    return pd.DataFrame(locations, columns=["latitude", "longitude", "start_time", "end_time"])


df = load_google_timeline(json_file)
df.head()

Unnamed: 0,latitude,longitude,start_time,end_time
0,38.028028,-78.508516,2025-02-12T17:56:54.000-05:00,2025-02-13T10:44:48.307-05:00
1,38.031619,-78.510846,2025-02-13T10:51:39.463-05:00,2025-02-13T12:31:04.514-05:00
2,38.031619,-78.510846,2025-02-13T10:51:39.463-05:00,2025-02-13T11:03:44.537-05:00
3,38.028028,-78.508516,2025-02-13T12:36:02.657-05:00,2025-02-13T16:04:27.150-05:00
4,38.052815,-78.501257,2025-02-13T16:15:59.999-05:00,2025-02-13T17:02:51.018-05:00


### Find Clusters based on Longitude and Latitude

In [20]:
def cluster_locations(df, eps=50, min_samples=2):
    coords = df[["latitude", "longitude"]].values
    db = DBSCAN(eps=eps / 6371000, min_samples=min_samples, metric="haversine").fit(np.radians(coords))
    
    df["cluster"] = db.labels_
    return df

df_clusters = cluster_locations(df)

In [21]:
df_clusters.to_csv(cluster_counts_file_path)

In [22]:
import folium

def plot_clusters_folium(df):
    # map_center = [df["latitude"].mean(), df["longitude"].mean()]
    m = folium.Map(location=[38.028028, -78.508516], zoom_start=14)

    # Define colors for clusters
    cluster_colors = {
        -1: "gray"  # Noise
    }
    unique_clusters = df["cluster"].unique()
    cmap = plt.cm.get_cmap("tab10", len(unique_clusters))

    for i, cluster in enumerate(unique_clusters):
        if cluster not in cluster_colors:
            cluster_colors[cluster] = f"#{''.join(np.random.choice(list('0123456789ABCDEF'), 6))}" 
    
    # adding points to a map
    for _, row in df.iterrows():
        folium.CircleMarker(
            location=[row["latitude"], row["longitude"]],
            radius=5,
            color=cluster_colors[row["cluster"]],
            fill=True,
            fill_color=cluster_colors[row["cluster"]],
            fill_opacity=0.6,
        ).add_to(m)

    return m

plot_clusters_folium(df_clusters)


  cmap = plt.cm.get_cmap("tab10", len(unique_clusters))


In [23]:
print(df_clusters['cluster'].nunique())

34


In [24]:
df_filtered = df_clusters[df_clusters["cluster"] != -1]
cluster_means = df_filtered.groupby("cluster")[["latitude", "longitude"]].mean().reset_index()
cluster_means.to_csv(cluster_data_file_path, index=False)

In [25]:
# plot ONLY the cluster means on the map
plot_clusters_folium(cluster_means)

  cmap = plt.cm.get_cmap("tab10", len(unique_clusters))


### Use Google Places API to find Location Names

In [26]:
import requests

# Define the API endpoint
url = "https://places.googleapis.com/v1/places:searchNearby"

# Define the headers
headers = {
    "Content-Type": "application/json",
    "X-Goog-Api-Key": GOOGLE_API_KEY,  # Replace with your actual API key
    "X-Goog-FieldMask": "places.displayName,places.formattedAddress,places.types,places.id"
}

# Define the payload
data = {
    "maxResultCount": 1,
    "locationRestriction": {
        "circle": {
            "center": {
                "latitude": 38.904423,
                "longitude": -77.2036175000000
            },
            "radius": 20.0
        }
    }
}
# Make the POST request
response = requests.post(url, headers=headers, json=data)
print(response.json())

{'places': [{'id': 'ChIJHX0M4tdKtokRu7PPetQjRTY', 'types': ['grocery_store', 'supermarket', 'liquor_store', 'market', 'food_store', 'health', 'store', 'food', 'point_of_interest', 'establishment'], 'formattedAddress': '7514 Leesburg Pike, Falls Church, VA 22043, USA', 'displayName': {'text': "Trader Joe's", 'languageCode': 'en'}}]}


In [27]:
def get_place_info(lat, lon):
    url = "https://places.googleapis.com/v1/places:searchNearby"

    # Define the headers
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": GOOGLE_API_KEY,
        "X-Goog-FieldMask": "places.displayName,places.formattedAddress,places.types,places.id"
    }

    # Define the payload
    data = {
        "maxResultCount": 1,
        "locationRestriction": {
            "circle": {
                "center": {
                    "latitude": lat,
                    "longitude": lon
                },
                "radius": 20.0
            }
        }
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()


In [28]:
cluster_means = pd.read_csv(cluster_data_file_path)
results = []

for index, row in cluster_means.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    cluster_id = row['cluster']
    
    place_data = get_place_info(lat, lon)
    
    places_info = place_data.get('places', [])
    
    # Loop through the places and store the relevant info
    for place in places_info:
        results.append({
            'cluster_id': cluster_id,
            'latitude': lat,
            'longitude': lon,
            'place_id': place.get('id', ''),
            'place_types': place.get('types', []),
            'place_address': place.get('formattedAddress', ''),
            'place_name': place.get('displayName', {}).get('text', '')
        })


place_info_df = pd.DataFrame(results)
place_info_df.head()

Unnamed: 0,cluster_id,latitude,longitude,place_id,place_types,place_address,place_name
0,0.0,38.028028,-78.508516,ChIJwfbNe1OHs4kRs0PzrATWiPQ,"[apartment_complex, point_of_interest, establi...","1800 Jefferson Park Ave, Charlottesville, VA 2...",1800 JPA
1,1.0,38.031619,-78.510846,ChIJB1s6kd6Hs4kRJhuNmO73ZLk,"[point_of_interest, establishment]","85 Engineer's Way, Charlottesville, VA 22903, USA",Rice Hall
2,2.0,38.052815,-78.501257,ChIJI5T2nbeHs4kRuLewu_1gO8g,"[shopping_mall, point_of_interest, establishment]","1117 Emmet St N, Charlottesville, VA 22903, USA",Barracks Road Shopping Center
3,3.0,38.127095,-78.444288,ChIJCf9IHQl3tIkR9yV1GWU27zw,"[shopping_mall, grocery_store, food_store, sto...","163 Community St, Charlottesville, VA 22911, USA",Hollymead Town Center
4,4.0,38.129001,-78.440783,ChIJ_5xbqgh3tIkRC_j6tDv7bLs,"[department_store, shoe_store, electronics_sto...","312 Connor Dr, Charlottesville, VA 22911, USA",Target


In [29]:
place_info_df.to_csv(places_data_file_path)