In [109]:
import json
import os
import requests
import pandas as pd
from datetime import datetime
from pathlib import Path

In [None]:
GOOGLE_MAPS_API_KEY = os.environ.get("GOOGLE_MAPS_API_KEY")
if not GOOGLE_MAPS_API_KEY:
    raise ValueError("GOOGLE_MAPS_API_KEY environment variable not set")

## Extract the visited places IDs from Google History data

In [66]:
GOOGLE_TAKEOUT_DATA_FILE = Path("Timeline.json")
with open(GOOGLE_TAKEOUT_DATA_FILE, "r") as f:
    data: dict = json.load(f)
    semantic_location_history: list[dict] = data["semanticSegments"]

In [None]:
unique_visited_places_ids = set()

for d in semantic_location_history:
    if d.get("visit"):
        if d["visit"].get("topCandidate").get("placeId"):
            unique_visited_places_ids.add(d["visit"]["topCandidate"]["placeId"])

print("Number of all-time unique places visited:")
print(len(unique_visited_places_ids))

## Get places names file by requesting the Google Places API

In [None]:
places_names = {}

# Load existing data if present
try:
    with open("corresponding_place_names.json", "r") as f:
        places_names = json.load(f)
except Exception:
    places_names = {}

for place_id in unique_visited_places_ids:
    if place_id not in places_names:
        url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&key={GOOGLE_MAPS_API_KEY}"
        response = requests.get(url)
        data = response.json()
        status = data.get("status")
        # See https://developers.google.com/maps/reporting-and-monitoring/reporting#response-code-graphs
        if status == "NOT_FOUND":
            places_names[place_id] = None
        elif status == "INVALID_REQUEST":
            print(f"Invalid place_id {place_id}, skipping.")
            places_names[place_id] = None
        elif status == "OK":
            place_name = data.get("result", {}).get("name")
            # TODO: we should also get the address, the short name and the long name
            places_names[place_id] = place_name
        else:
            print(f"Error for place_id {place_id}: {status}")
            print("Full response:", data)
            break  # Stop the loop if there's an error (e.g., quota exceeded)
        # Write after each addition (less efficient, but safer as a long job)
        with open("corresponding_place_names.json", "w") as f:
            json.dump(places_names, f, indent=2, ensure_ascii=False)


## Build the conbini visits list

In [99]:
SEARCH_STRINGS = ["7-eleven", "familymart", "lawson", "convenience"]

conbinis_visits: list[dict] = []

with open(GOOGLE_TAKEOUT_DATA_FILE, "r") as f:
    data: dict = json.load(f)
    semantic_location_history: list[dict] = data["semanticSegments"]
    for d in semantic_location_history:
        place_id: str | None = d.get("visit", {}).get("topCandidate", {}).get("placeId")
        if place_id and places_names.get(place_id) is not None:
            for s in SEARCH_STRINGS:
                if s.lower() in places_names[place_id].lower():
                    conbini_visit: dict = d
                    conbini_visit["visit"]["topCandidate"]["placeName"] = places_names[
                        place_id
                    ]
                    conbinis_visits.append(conbini_visit)

### Save as JSON file

In [100]:
now: str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename: str = f"{now}_conbinis-visits.json"

with open(filename, "w") as f:
    json.dump(conbinis_visits, f, indent=2, ensure_ascii=False)

## Use the JSON to analyze data

### Load the JSON file as Pandas dataframe

In [None]:
with open(filename, "r") as f:
    data = json.load(f)

df = pd.json_normalize(data)
df["startDatetime"] = pd.to_datetime(df["startTime"])
df['endDatetime'] = pd.to_datetime(df['endTime'])
df.info()

### Number of conbinis visits

In [None]:
len(df)

### Number of unique conbinis visited

In [None]:
df['visit.topCandidate.placeId'].nunique()

### Earliest conbini visit

In [None]:
df.iloc[df["startDatetime"].idxmin()][["visit.topCandidate.placeName", "startDatetime"]]

### Top 3 most visited conbinis

In [None]:
df2 = df[
    ["visit.topCandidate.placeId", "visit.topCandidate.placeName"]
].copy()
df2 = df2.drop_duplicates(subset=["visit.topCandidate.placeId"], keep="first")
df2["visitsNb"] = df2["visit.topCandidate.placeId"].map(
    df["visit.topCandidate.placeId"].value_counts()
)
df2.sort_values(by='visitsNb', ascending=False, inplace = True)

df2[:3][["visit.topCandidate.placeName", "visitsNb"]]