In [None]:
import pandas as pd
import pyreadr
import geopy.distance
from tqdm.notebook import tqdm
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import contextily as ctx
import os


In [None]:
filename_DD = '../data/nextbike/rents_returns_by_bike_Dresden_01-03_09-10.24.pkl'
filename_FB = '../data/nextbike/rents_returns_by_bike_Freiburg_06-07.23_09-10.24.pkl'
df_DD = pd.read_pickle(filename_DD)
df_FB = pd.read_pickle(filename_FB)

In [None]:
events_path = "../data/events/df_events will all coordinates 2025-01-16_18-03-13.pkl"
df_events = pd.read_pickle(events_path)

In [None]:
df_events.City.value_counts()

In [None]:
df_events = df_events[df_events.City.isin(['Dresden', 'Freiburg'])]

In [None]:
# # other cities near Dresden are too far -- exclude them
# df_events.to_pickle(events_path)

In [None]:
# df_events_other_cities =events.loc[~events.City.isin(['Dresden', 'Freiburg']), ['lat', 'lon']]

In [None]:
df_events_DD = df_events[df_events.City == 'Dresden']
df_events_FB = df_events[df_events.City == 'Freiburg']

In [None]:
df_events_DD.describe()

In [None]:
df_DD.lat.min()

In [None]:
df_events_DD.lat.min()

In [None]:
df_events_DD.head(2)

In [None]:
df_DD.head(2)

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 100)
pd.set_option('display.max_colwidth', 400)

In [None]:
len(df_FB.loc[df_FB.datetime.dt.month==10, "bike_number"].unique())

In [None]:
len(df_DD.loc[df_DD.datetime.dt.month==10, "bike_number"].unique())

In [None]:
df_DD['month']=df_DD.datetime.dt.to_period('M')

In [None]:
df_FB['month']=df_FB.datetime.dt.to_period('M')

In [None]:
df_DD['is_dockless'] = df_DD.station_number.isna()
df_FB['is_dockless'] = df_FB.station_number.isna()

In [None]:
# df_DD[df_DD.rent_or_return=='returns'].groupby(['month', 'is_dockless']).size()

In [None]:
# df_FB[df_FB.rent_or_return=='rent'].groupby(['month', 'is_dockless']).size()

In [None]:
df_DD[df_DD.rent_or_return=='rent']['is_dockless'].value_counts(normalize=True)

In [None]:
df_DD[df_DD.rent_or_return=='returns']['is_dockless'].value_counts(normalize=True)

In [None]:
df_FB[df_FB.rent_or_return=='rent']['is_dockless'].value_counts(normalize=True)

In [None]:
# df_DD[df_DD.rent_or_return=='returns'].groupby("month")['is_dockless'].value_counts(normalize=True)

In [None]:
# df_FB[df_FB.rent_or_return=='returns'].groupby("month")['is_dockless'].value_counts(normalize=True)

In [None]:
# df_FB[df_FB.rent_or_return=='rent'].groupby("month")['is_dockless'].value_counts(normalize=True)

In [None]:
df_DD[df_DD.datetime.dt.month==6].datetime

In [None]:
def round_up_to_30_minutes(dt):
    # Calculate the number of seconds since the last 30-minute mark
    remainder = (dt.minute % 30) * 60 + dt.second
    if remainder > 0:
        dt += pd.Timedelta(seconds=(30 * 60 - remainder))
    return dt

In [None]:
df_DD['datetime_rounded'] = df_DD.datetime.apply(round_up_to_30_minutes)

In [None]:
df_FB['datetime_rounded'] = df_FB.datetime.apply(round_up_to_30_minutes)

In [None]:
len(df_DD['datetime_rounded'].unique().tolist())

In [None]:
len(df_FB['datetime_rounded'].unique().tolist())

In [None]:
df_DD.sample(frac=1)

In [None]:
result = pyreadr.read_r('/Users/v.sinichenko/Library/Mobile Documents/com~apple~CloudDocs/MyFiles/Python/nextbike_original_data/Dresden_01-03.24/2024-01-31/2024-01-31-08-15-00.rds') # Wednesday
df_rushhour = result[None]

In [None]:
df_rushhour.tail()

In [None]:
coords_1 = tuple(df_DD.loc[259996, ["lat", "lng"]])
coords_2 = tuple(df_DD.loc[259997, ["lat", "lng"]])
geopy.distance.geodesic(coords_1, coords_2).m # distance fluctuate at 45 meters, 24 m... 

In [None]:
print(coords_1[0] - coords_2[0])
print(coords_1[1] - coords_2[1])


In [None]:
coords_1

In [None]:
coords_2

In [None]:
df_DD[df_DD.rent_or_return=='returns']['is_dockless'].value_counts(normalize=True)

In [None]:
df_rushhour.tail()

In [None]:
BIKE_ID = 930609
df_rushhour[df_rushhour.bike_number==BIKE_ID]

In [None]:
df_DD.loc[259988, ["lat", "lng"]]

In [None]:
coords_1 = df_rushhour.loc[df_rushhour.bike_number==BIKE_ID, ["lat", "lng"]].iloc[0]
coords_2 = df_DD.loc[259988, ["lat", "lng"]]
geopy.distance.geodesic(coords_1, coords_2).m

In [None]:
coords_1

In [None]:
df_DD.loc[259988, :].to_dict()

In [None]:
df_DD[(df_DD.bike_number==BIKE_ID) & (df_DD.date==pd.to_datetime("2024-01-31").date())].sort_values("datetime")

In [None]:
# read all files for this day and keep only given bike_id

In [None]:
rows = []
folder = "/Users/v.sinichenko/Library/Mobile Documents/com~apple~CloudDocs/MyFiles/Python/nextbike_original_data/Dresden_01-03.24/2024-01-31"
for filename in tqdm(os.listdir(folder)):
    file_path = f"{folder}/{filename}"
    result = pyreadr.read_r(file_path)
    df_tmp = result[None]
    row = df_tmp[df_tmp.bike_number==BIKE_ID].to_dict(orient='records')
    rows.append(row)

In [None]:
rows = [row[0] for row in rows if row]

In [None]:
df_bike = pd.DataFrame(rows)

In [None]:
df_bike = df_bike.sort_values("time_stamp")

In [None]:
df_bike

In [None]:
# df_rushhour.state.value_counts()

In [None]:
# df_rushhour.no_registration.value_counts()

In [None]:
df_rushhour

In [None]:
df_rushhour.head()

In [None]:
date_chosen = "2024-09-04"
time_chosen = "08-15-00"
city_chosen = "Freiburg"
result = pyreadr.read_r(f'/Users/v.sinichenko/Library/Mobile Documents/com~apple~CloudDocs/MyFiles/Python/nextbike_original_data/{city_chosen}_09-10.24/{date_chosen}/{date_chosen}-{time_chosen}.rds') # Wednesday
df_rushhour = result[None]

In [None]:
df_rushhour = df_rushhour[df_rushhour.lat!=df_rushhour.lat.min()]

In [None]:
df_rushhour.bike.value_counts()

In [None]:
# df_DD["is_dockless"]

In [None]:
# df_DD[df_DD.datetime.dt.date==pd.to_datetime(date_chosen).date()]

In [None]:
df_rushhour

In [None]:
# Sample list of coordinates (latitude, longitude) in a city (e.g., New York)

# Convert to GeoPandas DataFrame
geometry = [Point(xy) for xy in zip(df_rushhour.lng, df_rushhour.lat)]
gdf = gpd.GeoDataFrame(df_rushhour, geometry=geometry, crs="EPSG:4326")  

# Reproject to a metric CRS for plotting with basemap
gdf = gdf.to_crs(epsg=3857)

gdf_dockless = gdf[gdf['station_number'] == 0]
gdf_stationed = gdf[gdf['station_number'] != 0]

# Plot the points on a map
fig, ax = plt.subplots(figsize=(10, 10))

gdf_dockless.plot(ax=ax, marker="o", color='cornflowerblue', markersize=15, label='Free-floating bikes')
gdf_stationed.plot(ax=ax, marker='o', color='darkred', markersize=15, label='Bikes parked at a station')

# Add a basemap (e.g., OpenStreetMap)
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

# Add labels for each point
# for x, y, coord in zip(gdf.geometry.x, gdf.geometry.y, coordinates):
#     plt.text(x, y, f'{coord}', fontsize=8, ha='left')
ax.axis('off')
# Customize and display the map
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.legend(loc="upper left")
plt.savefig(f'/Users/v.sinichenko/Downloads/plots/{city_chosen}_bikes_parked_on_rushhour_{date_chosen}-{time_chosen}.png', format='png', bbox_inches='tight')
plt.show()


In [None]:
# df_events_other_cities.drop_duplicates()

In [None]:
#### this plot resulted in surrounded cities excluded
# # Sample list of coordinates (latitude, longitude) in a city (e.g., New York)

# # Convert to GeoPandas DataFrame
# geometry = [Point(xy) for xy in zip(df_events_other_cities.lon, df_events_other_cities.lat)]
# gdf = gpd.GeoDataFrame(df_events_other_cities, geometry=geometry, crs="EPSG:4326")  

# # Reproject to a metric CRS for plotting with basemap
# gdf = gdf.to_crs(epsg=3857)


# # Plot the points on a map
# fig, ax = plt.subplots(figsize=(10, 10))

# gdf.plot(ax=ax, marker=0, color='blue', markersize=20, label='Free-floating bikes')

# # Add a basemap (e.g., OpenStreetMap)
# ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

# # Add labels for each point
# # for x, y, coord in zip(gdf.geometry.x, gdf.geometry.y, coordinates):
# #      plt.text(x, y, f'{coord}', fontsize=8, ha='left')
# ax.axis('off')
# # Customize and display the map
# plt.xlabel("Longitude")
# plt.ylabel("Latitude")
# plt.legend()
# #plt.savefig('../data/plots/FB_station_dynamics.png', format='png')
# plt.show()
