In [None]:
!pip install pandas requests

In [2]:
import json
import os
import requests
import pandas as pd
from datetime import datetime
from pathlib import Path

In [None]:
GOOGLE_MAPS_API_KEY = os.environ.get("GOOGLE_MAPS_API_KEY")
if not GOOGLE_MAPS_API_KEY:
    raise ValueError("GOOGLE_MAPS_API_KEY environment variable not set")

## Extract the visited places IDs from Google History data

In [None]:
GOOGLE_TAKEOUT_DATA_FILE = Path(
    "/Users/bolino/Library/CloudStorage/GoogleDrive-me@adriencarpentier.com/My Drive/Timeline.json"
)
with open(GOOGLE_TAKEOUT_DATA_FILE, "r") as f:
    data: dict = json.load(f)
    semantic_location_history: list[dict] = data["semanticSegments"]

In [None]:
unique_visited_places_ids = set()

for d in semantic_location_history:
    if d.get("visit"):
        if d["visit"].get("topCandidate").get("placeId"):
            unique_visited_places_ids.add(d["visit"]["topCandidate"]["placeId"])

print("Number of all-time unique places visited:")
print(len(unique_visited_places_ids))

## Get places names file by requesting the Google Places API

In [14]:
places_details = {}

# Load existing data if present
try:
    with open("places_details.json", "r") as f:
        places_details = json.load(f)
except Exception:
    places_details = {}

for place_id in unique_visited_places_ids:
    if place_id not in places_details:
        url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&key={GOOGLE_MAPS_API_KEY}"
        response = requests.get(url)
        data = response.json()
        status = data.get("status")
        # See https://developers.google.com/maps/reporting-and-monitoring/reporting#response-code-graphs
        if status == "NOT_FOUND":
            places_details[place_id] = None
        elif status == "INVALID_REQUEST":
            print(f"Invalid place_id {place_id}, skipping.")
            places_details[place_id] = None
        elif status == "OK":
            result = data.get("result", {})

            # Main place name
            place_name = result.get("name")

            # Formatted address (human-readable)
            formatted_address = result.get("formatted_address")

            # Extract long_name and short_name for key components if present
            address_components = result.get("address_components", [])
            components = {}
            for component in address_components:
                for component_type in component.get("types", []):
                    if component_type not in components:
                        components[component_type] = {
                            "long_name": component.get("long_name"),
                            "short_name": component.get("short_name"),
                        }

            # Store all in a structured dict
            places_details[place_id] = {
                "name": place_name,
                "formattedAddress": formatted_address,
                "addressComponents": components,
            }
        else:
            print(f"Error for place_id {place_id}: {status}")
            print("Full response:", data)
            break  # Stop the loop if there's an error (e.g., quota exceeded)

        # Write after each addition (less efficient, but safer as a long job)
        with open("places_details.json", "w") as f:
            json.dump(places_details, f, indent=2, ensure_ascii=False)

## Build the conbini visits list

In [138]:
SEARCH_STRINGS = ["7-eleven", "familymart", "lawson", "convenience", "seven-eleven", "7 eleven", "seven eleven", "7-11", "7-11", "7 eleven", "seven eleven", "7-11", "ministop"]

conbinis_visits: list[dict] = []

with open(GOOGLE_TAKEOUT_DATA_FILE, "r") as f:
    data: dict = json.load(f)
    semantic_location_history: list[dict] = data["semanticSegments"]
    for d in semantic_location_history:
        place_id: str | None = d.get("visit", {}).get("topCandidate", {}).get("placeId")
        if place_id and places_details.get(place_id) is not None:
            place_name: str = places_details[place_id].get("name", "")
            place_address: str = places_details[place_id].get("formatted_address", "")
            for s in SEARCH_STRINGS:
                if s.lower() in place_name.lower():
                    conbini_visit: dict = d
                    conbini_visit["visit"]["topCandidate"]["placeName"] = place_name
                    conbini_visit["visit"]["topCandidate"]["placeAddress"] = (
                        place_address
                    )
                    conbinis_visits.append(conbini_visit)
                    break  # Optional: stop checking other strings if matched

### Save as JSON file

In [None]:
now: str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename: str = f"{now}_conbinis-visits.json"

with open(filename, "w") as f:
    json.dump(conbinis_visits, f, indent=2, ensure_ascii=False)

## Build the specific place visits list

In [39]:
# Put the placeId of a specific place you want to count how many times you've bin
SPECIFIC_PLACE_ID = "ChIJVVVyN2DzGGARHqP7hMzXU4w"

In [40]:
tengu_shokudo_visits: list[dict] = []

with open(GOOGLE_TAKEOUT_DATA_FILE, "r") as f:
    data: dict = json.load(f)
    semantic_location_history: list[dict] = data["semanticSegments"]
    for d in semantic_location_history:
        place_id: str | None = d.get("visit", {}).get("topCandidate", {}).get("placeId")
        if place_id and place_id == SPECIFIC_PLACE_ID:
            tengu_shokudo_visit: dict = d
            tengu_shokudo_visit["visit"]["topCandidate"]["placeName"] = places_details[place_id].get("name", "")
            tengu_shokudo_visit["visit"]["topCandidate"]["placeAddress"] = places_details[place_id].get("formatted_address", "")
            tengu_shokudo_visits.append(tengu_shokudo_visit)

In [22]:
now: str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename: str = f"{now}_tengu-shokudo-visits.json"

with open(filename, "w") as f:
    json.dump(tengu_shokudo_visits, f, indent=2, ensure_ascii=False)

## Use the JSON to analyze data

### Load the JSON file as Pandas dataframe

In [None]:
with open(filename, "r") as f:
    data: list[dict] = json.load(f)

df = pd.json_normalize(data)
df["startDatetime"] = pd.to_datetime(df["startTime"])
df['endDatetime'] = pd.to_datetime(df['endTime'])
df.info()

### Number of conbinis visits

In [None]:
len(df)

### Number of unique conbinis visited

In [None]:
df['visit.topCandidate.placeId'].nunique()

### Earliest conbini visit

In [None]:
df.iloc[df["startDatetime"].idxmin()][
    ["visit.topCandidate.placeName", "visit.topCandidate.placeAddress", "startDatetime"]
]

### Top 3 most visited conbinis

In [None]:
df2 = df[
    [
        "visit.topCandidate.placeId",
        "visit.topCandidate.placeName",
        "visit.topCandidate.placeAddress",
    ]
].copy()
df2 = df2.drop_duplicates(subset=["visit.topCandidate.placeId"], keep="first")
df2["visitsNb"] = df2["visit.topCandidate.placeId"].map(
    df["visit.topCandidate.placeId"].value_counts()
)
df2.sort_values(by='visitsNb', ascending=False, inplace = True)

df2[:3][["visit.topCandidate.placeName", "visit.topCandidate.placeAddress", "visitsNb"]]

### Longest visits

In [None]:
# Calculate visit duration
df["visitDuration"] = df["endDatetime"] - df["startDatetime"]

# Get the 3 longest visits
longest_visits = df.nlargest(3, "visitDuration")[
    ["startDatetime", "endDatetime", "visitDuration"]
]

# Display nicely by accessing the same rows in df
df.loc[
        longest_visits.index,
        [
            "startDatetime",
            "endDatetime",
            "visitDuration",
        ],
    ]

