In [1]:
import requests
from tqdm import tqdm_notebook, tqdm
import pycountry
import pandas as pd

def coordinates_from_city(city: str, country: str, zip:str) -> dict:
    country = pycountry.countries.get(alpha_3=country).alpha_2

    #public.opendatasoft doesn't have very small cities
    #however it makes sense to get coordinates based on city names and not on zip codes, because some cities have more than one zip code
    #so we try to get as much coordinates from city names and the rest from zip code names

    try:
        req = f"https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/" \
                        f"geonames-all-cities-with-a-population-1000/records?" \
                        f"coordinates&where=%22{city}%22%20in%20alternate_names%20AND%20country_code%20%3D%20%22{country}%22&limit=20"
        response = requests.get(req)
        ret = response.json()
        lat = ret["results"][0]["coordinates"]["lat"]
        lng = ret["results"][0]["coordinates"]["lon"]
        city = city

    except:
        req = f"https://zip-api.eu/api/v1/info/{country}-{zip}"
        response = requests.get(req)
        ret = response.json()
        lat = ret["lat"]
        lng = ret["lng"]
        city = ret["place_name"]
    
    return pd.Series([float(lng), float(lat), city])

def round_bin(x, prec=2, base=.5):
  return round(base * round(float(x)/base),prec)




In [2]:
file = r"C:\Users\REH\Meine Ablage\Mamore\PROMO\Vertrieb\versand\merch_orders_2023-12-07_all_hermes.csv"
df = pd.read_csv(file, encoding="utf-16", index_col=False, header=None, sep=";", dtype=str)

tqdm_notebook().pandas()
df[["lon", "lat", "place_name"]] = df.iloc[:, [6, 8, 5]].progress_apply(
        lambda x: coordinates_from_city(x[6], x[8], x[5]),
        axis=1
    )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tqdm_notebook().pandas()


0it [00:00, ?it/s]

  0%|          | 0/94 [00:00<?, ?it/s]

In [12]:
import plotly.express as px
import pandas as pd

df_count = df.copy()
df_count.lon = df_count.lon.apply(lambda x: round_bin(x, base=.01))
df_count.lat = df_count.lat.apply(lambda x: round_bin(x, base=.01))
df_count = df_count.groupby(by=["lon", "lat", "place_name"]).size().reset_index(name="counts")

df_drop = df_count[df_count.lat != "unknown"]

color_scale = [(0, 'blue'), (1,'red')]

fig = px.scatter_mapbox(df_drop, 
                        lat="lat", 
                        lon="lon", 
                        color_continuous_scale=color_scale,
                        hover_name="place_name", 
                        hover_data=["place_name"],
                        zoom=5, 
                        size="counts",
                        color="counts",
                        height=800,
                        width=800,
                        title=f"{df_drop.counts.sum()} data points")

fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, title_x=0.5, title_y=0.98,)
fig.show()

94