In [1]:
import sys
import altair as alt
import networkx as nx
import numpy as np
import pandas as pd
from IPython.display import display

".." in sys.path or sys.path.append("..")
from alph import alph, layout

In [2]:
# Ref: https://openflights.org/data.html
# https://www.kaggle.com/code/divyanshrai/airport-route-analysis
# https://altair-viz.github.io/user_guide/faq.html#maxrowserror-how-can-i-plot-large-datasets

routes_url = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat"
airports_url = "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat"

airports_cols = [
    x.lower().replace(" ", "_")
    for x in [
        "Airport ID", "Name", "City", "Country", "IATA",
        "ICAO", "Latitude", "Longitude", "Altitude", 
        "Timezone", "DST", "Tz database time zone", "Type", "Source",
    ]
]
routes_cols = [
    x.lower().replace(" ", "_")
    for x in [
        "Airline", "Airline ID",
        "Source airport", "Source airport ID",
        "Destination airport", "Destination airport ID",
        "Codeshare", "Stops", "Equipment"
    ]
]

airports = pd.read_csv(airports_url, names=airports_cols)

routes = pd.read_csv(routes_url, names=routes_cols).pipe(
    lambda d: d[
        (d["source_airport_id"] != "\\N") & (d["destination_airport_id"] != "\\N")
    ]
).astype({
    "source_airport_id": int,
    "destination_airport_id": int,
}).pipe(
    lambda d: d[
        (d["source_airport_id"].isin(airports["airport_id"])) & (d["destination_airport_id"].isin(airports["airport_id"]))
    ]
)

print(airports.shape[0], routes.shape[0])
display(routes.head(2))
display(airports.head(2))

7698 66771


Unnamed: 0,airline,airline_id,source_airport,source_airport_id,destination_airport,destination_airport_id,codeshare,stops,equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2


Unnamed: 0,airport_id,name,city,country,iata,icao,latitude,longitude,altitude,timezone,dst,tz_database_time_zone,type,source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports


In [42]:
# routes.pipe(
#     #lambda d: d[d["stops"] == 0]
# )
route_edges = (
    routes
    .assign(
        a_id=lambda d: np.where(
            d["source_airport_id"] <= d["destination_airport_id"], d["source_airport_id"], d["destination_airport_id"]
        ),
        b_id=lambda d: np.where(
            d["source_airport_id"] <= d["destination_airport_id"], d["destination_airport_id"], d["source_airport_id"]
        ),
    )
    .pipe(lambda d: d[d["a_id"] != d["b_id"]])
    .groupby(["a_id", "b_id"])
    ["airline_id"].count()
    .rename("weight")    
    .reset_index()
).assign(
    weight_norm=lambda d: d["weight"] / d["weight"].max(),
)
route_edges.sort_values(by="weight", ascending=False).head()

Unnamed: 0,a_id,b_id,weight,weight_norm
17137,3682,3830,39,1.0
12958,3077,3885,24,0.615385
16444,3576,3682,24,0.615385
3623,507,3797,24,0.615385
13247,3179,3885,23,0.589744


In [60]:
airports_by_id = airports[[
    "airport_id", "name", "city", "country", "iata", "icao"
]].set_index("airport_id").to_dict("index")

G = nx.from_pandas_edgelist(route_edges, source="a_id", target="b_id", edge_attr="weight")
nx.set_node_attributes(G, {n: airports_by_id[n] for n in G.nodes()})
nx.set_node_attributes(G, nx.degree_centrality(G), "degree_centrality")

print("nodes:", len(G.nodes()), "edges:", len(G.edges()))
print("max deg centrality:", max([d for _, d in G.nodes(data="degree_centrality")]))

nodes: 3214 edges: 18858
max deg centrality: 0.0771864301276066


In [73]:
def alph_call(layout_fn=None):
    if layout_fn is None:
        layout_fn = lambda g: layout.force_atlas(
            g, weight_attr="weight", strongGravityMode=False, edgeWeightInfluence=1, gravity=80, seed=123
        )
    
    return alph(
        G,
        layout_fn=layout_fn,
        # layout_fn=lambda g: nx.spring_layout(
        #     g, weight="weight", k=1, iterations=1000, seed=123
        # ),
        node_args=dict(
            size=alt.Size(
                "degree_centrality",
                scale=alt.Scale(domain=[0,0.1], range=[2**2, 8**2]),
                legend=None
            ),
            fill=alt.value("#4b6f91"),
            halo_offset=None,
            tooltip_attrs=["iata", "city", "country"],
        ),
        edge_args=dict(
            color="#aaa",
            opacity=alt.Size(
                "weight:Q",
                scale=alt.Scale(domain=[0,1], range=[0.0, 0.5]),
                legend=None
            ),
            strokeWidth=alt.Size(
                "weight:Q",
                scale=alt.Scale(domain=[0,1], range=[0, 0.1]),
                legend=None
            ),
        ),
    ).configure_view(strokeWidth=0).properties( width=1000, height=800)

#alt.data_transformers.enable('json')
#alph_call()

In [74]:
# %reload_ext line_profiler
# %lprun -f alph alph_call()

In [75]:
pos_lat_long = {
    k: (item["longitude"], item["latitude"])
    for k, item in airports.set_index("airport_id").to_dict("index").items()
}
list(pos_lat_long.items())[0]

(1, (145.391998291, -6.081689834590001))

In [76]:
alph_call(layout_fn=lambda _: pos_lat_long)