In [None]:
import pandas as pd
import geopandas as gpd
import contextily as ctx
import matplotlib.pyplot as plt
import h3

In [None]:
def plot_df(df, column=None, ax=None, add_basemap=True):
    "Plot based on the `geometry` column of a GeoPandas dataframe"
    df = df.copy()
    df = df.to_crs(epsg=3857)  # web mercator

    if ax is None:
        _, ax = plt.subplots(figsize=(8,8))
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    df.plot(
        ax=ax,
        alpha=0.25, edgecolor='k',
        column=column, categorical=True,
        legend=True, legend_kwds={'loc': 'upper left'},
    )
    if add_basemap:
        ctx.add_basemap(ax, crs=df.crs, source=ctx.providers.CartoDB.Positron)
def plot_shape(shape, ax=None, add_basemap=True):
    df = gpd.GeoDataFrame({'geometry': [shape]}, crs='EPSG:4326')
    plot_df(df, ax=ax, add_basemap=add_basemap)
def plot_cell(cell, ax=None):
    shape = h3.cells_to_h3shape([cell])
    plot_shape(shape, ax=ax)
    
def plot_cells(cells, ax=None):
    fig, ax = plt.subplots(figsize=(8,8))
    shape = h3.cells_to_h3shape(cells)
    plot_shape(shape, ax=ax, add_basemap=True)
    
    for single_cell in cells:
        single_shape = h3.cells_to_h3shape([single_cell])
        # gdf = gpd.GeoDataFrame({'geometry': [single_shape]}, crs='EPSG:4326')
        # gdf = gdf.to_crs(epsg=3857)
        # gdf.plot(ax=ax, alpha=0.5, edgecolor='k')
        plot_shape(single_shape, ax=ax, add_basemap=False)


In [None]:
filename_DD_allowed_hexagons = '../../data/df_DD_for_SARIMAX_2025-04-08_14-28-37.csv'
filename_FB_allowed_hexagons = '../../data/df_FB_for_SARIMAX_2025-04-08_14-28-37.csv'
df_DD_allowed_hexagons= pd.read_csv(filename_DD_allowed_hexagons)
df_FB_allowed_hexagons = pd.read_csv(filename_FB_allowed_hexagons)

In [None]:
allowed_hex_id_DD =  df_DD_allowed_hexagons.hex_id.unique().tolist()
allowed_hex_id_FB =  df_FB_allowed_hexagons.hex_id.unique().tolist()


In [None]:
len(allowed_hex_id_DD)

In [None]:
filename_DD = '../../data/nextbike/trips_Dresden with hexagons 2025-03-17_12-01-41.pkl'
filename_FB = '../../data/nextbike/trips_Freiburg with hexagons 2025-03-17_12-01-41.pkl'
df_DD = pd.read_pickle(filename_DD)
df_FB = pd.read_pickle(filename_FB)

In [None]:
df_DD

In [None]:
SMALL_RESOLUTION = 10

In [None]:
df_DD['small_hex_id_return'] = df_DD.apply(lambda row: h3.latlng_to_cell(row['lat_return'], row['lng_return'], SMALL_RESOLUTION), axis=1)

In [None]:
flt = (~df_DD['hex_id_return'].isin(allowed_hex_id_DD)) | (~df_DD['hex_id_rent'].isin(allowed_hex_id_DD))
df_DD.loc[flt, "small_hex_id_return"]=None


In [None]:
df_DD['small_hex_id_return'].value_counts()

In [None]:
# len(df_DD['small_hex_id_return'].unique())

In [None]:
# len(df_DD['small_hex_id_rent'].unique())

In [None]:
df_DD['small_hex_id_return'].unique()

In [None]:
small_hex_id_sample = sorted(df_DD['small_hex_id_return'].dropna().unique())[-10:]

In [None]:
largest_hex_ids = df_DD['small_hex_id_return'].value_counts().nlargest(10).index.tolist()

In [None]:
plot_cells(small_hex_id_sample)

In [None]:
plot_cells(largest_hex_ids)

In [None]:
# plot_cells(df_DD['small_hex_id_return'].dropna().unique())

In [None]:

df_FB['small_hex_id_return'] = df_FB.apply(lambda row: h3.latlng_to_cell(row['lat_return'], row['lng_return'], SMALL_RESOLUTION), axis=1)
df_DD['small_hex_id_rent'] = df_DD.apply(lambda row: h3.latlng_to_cell(row['lat_rent'], row['lng_rent'], SMALL_RESOLUTION), axis=1)
df_FB['small_hex_id_rent'] = df_FB.apply(lambda row: h3.latlng_to_cell(row['lat_rent'], row['lng_rent'], SMALL_RESOLUTION), axis=1)

In [None]:
flt = (~df_DD['hex_id_return'].isin(allowed_hex_id_DD)) | (~df_DD['hex_id_rent'].isin(allowed_hex_id_DD))
df_DD.loc[flt, "small_hex_id_return"]=None
df_DD.loc[flt, "small_hex_id_rent"]=None

In [None]:
len(df_FB.small_hex_id_rent.unique())

In [None]:
len(df_FB.small_hex_id_return.unique())

In [None]:
flt = (~df_FB['hex_id_return'].isin(allowed_hex_id_FB)) | (~df_FB['hex_id_rent'].isin(allowed_hex_id_FB))
df_FB.loc[flt, "small_hex_id_return"]=None
df_FB.loc[flt, "small_hex_id_rent"]=None

In [None]:
len(df_FB.small_hex_id_rent.unique())

In [None]:
len(df_FB.small_hex_id_return.unique())

In [None]:
from datetime import datetime
time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
df_DD.to_csv(f'../../data/nextbike/trips_DD_with_small_hexids_res10_{time}.csv', index=False)
df_FB.to_csv(f'../../data/nextbike/trips_FB_with_small_hexids_res10_{time}.csv', index=False)