In [None]:
import polars as pl
df = (
    pl.read_csv("../temp_data/weekly-london-cycles-db/data/*.csv")
    .with_columns(pl.col("query_time").str.strptime(pl.Datetime, "%Y-%m-%dT%H:%M:%S%.f"))
    .with_columns(
        pl.col("query_time").dt.round("15m").alias("rounded_time"),
    )
    .with_columns(
        pl.col("lat").cast(pl.Float32),
        pl.col("lon").cast(pl.Float32),
        pl.col("bikes").cast(pl.Int32),
        pl.col("docks").cast(pl.Int32),
        pl.col("empty_docks").cast(pl.Int32),
        ((pl.col("docks") - pl.col("empty_docks")) / pl.col("docks")).alias("occupancy_ratio"),
    )
)

In [None]:
stations = df.unique(subset=["place_id", "lat", "lon"])

In [None]:
stat = stations.filter(pl.col("lat")>41.5)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(stat['lon'], stat['lat'], c='blue', marker='o', label='Stations')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Geographical Distribution of Stations')
plt.legend()
plt.show()

In [None]:
min_lon, max_lon = stat['lon'].min(), stat['lon'].max()
min_lat, max_lat = stat['lat'].min(), stat['lat'].max()

print(f"Longitude range: {min_lon} to {max_lon}")
print(f"Latitude range: {min_lat} to {max_lat}")

In [None]:
# Divide the area into a grid
import numpy as np
lon_buffer = (max_lon - min_lon) * 0.02
lat_buffer = (max_lat - min_lat) * 0.02

n_lon, n_lat = 4, 4
lon_bins = np.linspace(min_lon - lon_buffer,  max_lon + lon_buffer, n_lon)
lat_bins = np.linspace( min_lat - lat_buffer, max_lat + lat_buffer,  n_lat)

print(lon_bins)
print(lat_bins)

In [None]:
# plot the grid with stations

plt.figure(figsize=(10, 6))
plt.scatter(stat['lon'], stat['lat'], c='blue', marker='o', label='Stations')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Geographical Distribution of Stations')
plt.legend()


plt.show()

In [None]:
lon_bins, lat_bins

In [None]:
import matplotlib.pyplot as plt
import polars as pl
import numpy as np

lon_intervals = list(zip(lon_bins[:-1], lon_bins[1:]))
lat_intervals = list(zip(lat_bins[:-1], lat_bins[1:]))

# plot the grid with stations
plt.figure(figsize=(10, 6), dpi=150)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Geographical Distribution of Stations')

# Create empty lists to store legend elements
legend_elements = []
legend_labels = []

for lat_interval, name in zip(lat_intervals, ['south', '', 'north']):
    for lon_interval, name2 in zip(lon_intervals, ['west', '', 'east']):
        plt.text(
            np.mean(lon_interval), np.mean(lat_interval), f"{name}{name2}", 
            horizontalalignment='center', verticalalignment='center', 
            fontsize=12, fontweight='bold')
        
        filtered = stat.filter(
            pl.col("lon") > lon_interval[0]).filter(
            pl.col("lon") < lon_interval[1]).filter(
            pl.col("lat") > lat_interval[0]).filter(
            pl.col("lat") < lat_interval[1])
        if len(filtered) > 0:
            
            mean_lon = filtered['lon'].mean()
            mean_lat = filtered['lat'].mean()

            distances = filtered.with_columns(
                ((pl.col("lon") - mean_lon)**2 + (pl.col("lat") - mean_lat)**2).alias("distance")
            )
            
            closest, closest_lon, closest_lat = distances.sort("distance").select(pl.first("place_id", "lon", "lat")).row(0)


            

            closest_scatter = plt.scatter(closest_lon, closest_lat, marker='o', color='black', s=70)

            mean_scatter = plt.scatter(mean_lon, mean_lat, marker='x', color='blue', s=70)

            scatter = plt.scatter(filtered['lon'], filtered['lat'], marker='.', s=50)
                        
            if 'Stations' not in legend_labels:
                legend_elements.append(scatter)
                legend_labels.append('Stations')
                
            if 'Closest to the mean' not in legend_labels:
                legend_elements.append(closest_scatter)
                legend_labels.append('Closest to the mean')
                
            if 'Mean point' not in legend_labels:
                legend_elements.append(mean_scatter)
                legend_labels.append('Mean point')


for lon in lon_bins:
    plt.axvline(lon, color='black', linestyle='--', lw=1, alpha=0.5)
for lat in lat_bins:
    plt.axhline(lat, color='black', linestyle='--', lw=1, alpha=0.5)
    
plt.legend(legend_elements, legend_labels, loc=4)
plt.tight_layout()
plt.savefig('geo-models.png')
plt.show()
