# Config

In [41]:
import pandas as pd
import geopandas as gpd
import json
import os
from shapely.geometry import Point, LineString

In [42]:
# Example usage
path = "../data/cities.csv"  # Replace with the actual path
output = "../data/cities/"

# Read data

In [43]:
df = pd.read_csv(path)

df['lat'] = df['lat'].astype(str).str.replace(',', '.').astype(float)
df['lon'] = df['lon'].astype(str).str.replace(',', '.').astype(float)

# 2. Create GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="EPSG:4326")
# gdf.head(25)


In [44]:
# 3. Split into three GeoDataFrames
gdf_lead = gdf[gdf['Role'] == 'Lead'].copy()
gdf_follower = gdf[gdf['Role'] == 'Follower'].copy()
gdf_cascade = gdf[gdf['Role'] == 'Cascade'].copy()
gdf_lead_follower = gdf[gdf['Role'] != 'Cascade'].copy()
gdf["Role"].value_counts()

Role
Cascade     11
Lead         5
Follower     5
Name: count, dtype: int64

In [45]:
# 4. Build Kepler arc data
arc_rows = []

for _, follower in gdf_follower.iterrows():
    conn_id = follower['Connection']
    lead_match = gdf_lead[gdf_lead['Connection'] == conn_id]
    if not lead_match.empty:
        lead = lead_match.iloc[0]
        arc_rows.append({
            'Origin': lead['City'],
            'Destination': follower['City'],
            'Connection': conn_id,
            'lat0': lead['lat'],
            'lng0': lead['lon'],
            'lat1': follower['lat'],
            'lng1': follower['lon']
        })

# 5. Convert to DataFrame for Kepler
df_arcs = pd.DataFrame(arc_rows)
df_arcs.head()

Unnamed: 0,Origin,Destination,Connection,lat0,lng0,lat1,lng1
0,Barcelona,Bologna,1,41.368775,2.161384,44.498955,11.327591
1,Gothenburg,Tampere,3,57.70887,11.97456,61.500755,23.747862
2,Heidelberg,Utrecht,4,49.37875,8.652434,52.0919,5.123
3,Lyon,Warsaw,5,45.754628,4.81238,52.2297,21.0122
4,Budapest,Zagreb,2,47.487913,19.060236,45.815,15.9819


# Save data

In [46]:
output
gdf_lead.to_csv(output+"lead.csv")
gdf_follower .to_csv(output+"follower.csv")
gdf_cascade.to_csv(output+"cascade.csv")
gdf_lead_follower.to_csv(output+"od_points.csv")
df_arcs.to_csv(output+"arcs.csv")