# Caution!

Google API is super expensive, please take care while running this script.

TODO: use GIS data instead of google API.

In [1]:
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime

import googlemaps
import pymongo

from domus_analytica.config import DomusSettings

config = DomusSettings(_env_file=".env.local")
domus_db = pymongo.MongoClient(config.mongo_uri).get_database(config.mongo_db_name)
suumo_details = domus_db.get_collection("suumo_details")
gmaps = googlemaps.Client(key=config.google_api_key)

search_time = datetime(2024, 4, 1, 14, 46, 31, 449000)

In [2]:
cond = {'search_time': search_time, "gps": {"$exists": False}}
print(f"There's {suumo_details.count_documents(cond)} documents need to update GPS info.")

There's 0 documents need to update GPS info.


In [45]:
for doc in suumo_details.find(cond):
    id_url = doc["search_details"]["url"]
    result_doc = {"id": id_url}
    content_details = {
        d["type"]: d["content"] for d in doc["content_details"]
    }
    if "gps" not in doc:
        address = content_details["住所"].split("\n")[0]
        geocode_result = gmaps.geocode(address, region="ja", language="ja")
        gps = {
            "latitude": geocode_result[0]["geometry"]["location"]["lat"],
            "longitude": geocode_result[0]["geometry"]["location"]["lng"],
        }
        suumo_details.update_one({"_id": doc["_id"]}, {"$set": {"gps": gps}})
        print(f"Update address {address}'s GPS: {gps}")

In [46]:
# To some key points
KEY_POINTS = [
    (
        (33.59118086094799, 130.398581611983),
        "tenjin",
    ),
    (
        (33.5873955705478, 130.41968891935684),
        "hakata"
    ),
    (
        (33.59030230439562, 130.37888950301377),
        "ohori_park"
    ),
]
for doc in suumo_details.find({'search_time': search_time}):
    suumo_details.update_one({"_id": doc["_id"]}, {"$set": {
        f"drive_routes_to_{target_name}": gmaps.directions(
            origin=(doc["gps"]["latitude"], doc["gps"]["latitude"]),
            destination=target_gps,
            mode="driving",
            language="ja",
            units="metric",
            region="ja",
        ) for target_gps, target_name in KEY_POINTS
    }})

In [130]:
def places_nearby(**kwargs):
    result = gmaps.places_nearby(**kwargs)
    yield from result["results"]
    if kwargs.get("radius") is not None:
        while "next_page_token" in result:
            try:
                result = gmaps.places_nearby(page_token=result["next_page_token"])
                yield from result["results"]
            except Exception as e:
                if str(e).find('INVALID_REQUEST') != -1:
                    time.sleep(1.0)
                else:
                    raise e


test_result = list(places_nearby(
    location=('33.5762809282905', '130.251347894427'),
    type=["train_station", "light_rail_station", "subway_station", "transit_station"],
    rank_by="distance"
))
test_result

[{'business_status': 'OPERATIONAL',
  'geometry': {'location': {'lat': 33.5706786, 'lng': 130.2460898},
   'viewport': {'northeast': {'lat': 33.5721407302915,
     'lng': 130.2472077802915},
    'southwest': {'lat': 33.5694427697085, 'lng': 130.2445098197085}}},
  'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/train-71.png',
  'icon_background_color': '#10BDFF',
  'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/train_rail_1_pinlet',
  'name': 'Susenji Station',
  'photos': [{'height': 3024,
    'html_attributions': ['<a href="https://maps.google.com/maps/contrib/117633406339242227669">TAKESHI M</a>'],
    'photo_reference': 'ATplDJba8EYQLtVJqn-b8AJFNWkrGE5oZ33LkrFv3kRh1M5ZNVBeyqGrx031iLlrBJLSeultqT7ZKyAvrcrTmZ4iwsy8GAB3BZsLtdlrdWD1_pbFUSHD66z0yEJ9RJQ_kPpy8mkroII85V77eTnxJofX0gPjS3gVK7vdKe7LoHu9fcsSJmQu',
    'width': 4032}],
  'place_id': 'ChIJNxjQssnrQTUR25oaQYi9no8',
  'plus_code': {'compound_code': 'H6CW+7C Fukuoka, Japan',
   'globa

In [137]:
NEARBY_TYPES = [
    dict(type=["train_station", "light_rail_station", "subway_station", "transit_station"], rank_by="distance"),
    # dict(type=["restaurant"], radius=1000),
    # dict(type=["supermarket"], radius=1000),
    # dict(type=["convenience_store"], radius=1000),
    # dict(type=["drugstore"], radius=1000),
    # dict(type=["park"], radius=1000),
    # dict(type=["primary_school"], radius=1000),
    # dict(type=["cafe"], radius=1000),
    # dict(type=["bus_station"], radius=500),
]

for nearby_kwargs in NEARBY_TYPES:
    key = "nearby_{}".format(nearby_kwargs["type"][0])
    condition = {'search_time': search_time, key: {"$exists": False}}


    def _update_doc(_doc):
        return suumo_details.update_one({"_id": _doc["_id"]}, {"$set": {
            key: {
                "search_kwargs": nearby_kwargs,
                "results": list(places_nearby(
                    location=(_doc["gps"]["latitude"], _doc["gps"]["longitude"]),
                    language="ja",
                    **nearby_kwargs
                ))
            }
        }}).acknowledged


    print(f"Updating {key}")
    with ThreadPoolExecutor(max_workers=64) as tpe:
        docs = list(suumo_details.find(condition, {"_id": 1, "gps": 1}))
        futures = [tpe.submit(_update_doc, doc) for doc in docs]
        assert all([f.result() for f in futures])


Updating nearby_train_station
Updating nearby_restaurant
Updating nearby_supermarket
Updating nearby_convenience_store
Updating nearby_drugstore
Updating nearby_park
Updating nearby_primary_school
Updating nearby_cafe
Updating nearby_bus_station


In [None]:
# Use for reset result
# suumo_details.update_many({}, {"$unset": {
#     "nearby_{}".format(nearby_kwargs["type"][0]): "" for nearby_kwargs in NEARBY_TYPES
# }}).modified_count