In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 7)

In [None]:
DATA_PATH = "/content/processed_flight_records.csv"

df = pd.read_csv(DATA_PATH)

# Identify key columns
origin_cols = [c for c in df.columns if 'origin' in c.lower()]
dest_cols = [c for c in df.columns if 'dest' in c.lower() or 'destination' in c.lower()]
date_cols = [c for c in df.columns if 'date' in c.lower() or 'time' in c.lower()]

origin_col = origin_cols[0] if origin_cols else None
dest_col = dest_cols[0] if dest_cols else None
date_col = date_cols[0] if date_cols else None

print("Using columns:")
print("Origin:", origin_col)
print("Destination:", dest_col)
print("Date:", date_col)

if date_col:
    df[date_col] = pd.to_datetime(df[date_col])

In [None]:
if origin_col and dest_col:
    route_counts = df.groupby([origin_col, dest_col]).size().reset_index(name='flight_count')
    route_counts = route_counts.sort_values('flight_count', ascending=False)

    route_counts.head(10)

In [None]:
plt.figure(figsize=(12,6))
route_counts.head(15).plot(
    kind='barh',
    x=origin_col,
    y='flight_count',
    legend=False
)
plt.title("Top 15 Routes by Flight Volume")
plt.xlabel("Number of Flights")
plt.ylabel("Origin Airport")
plt.gca().invert_yaxis()
plt.show()


In [None]:
G = nx.DiGraph()

for _, row in route_counts.iterrows():
    G.add_edge(row[origin_col], row[dest_col], weight=row['flight_count'])

print("Number of airports (nodes):", G.number_of_nodes())
print("Number of routes (edges):", G.number_of_edges())

In [None]:
# Degree centrality
degree_centrality = nx.degree_centrality(G)

# Betweenness centrality (may take time for large graphs)
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')

centrality_df = pd.DataFrame({
    'airport': list(degree_centrality.keys()),
    'degree_centrality': list(degree_centrality.values()),
    'betweenness_centrality': [betweenness_centrality[a] for a in degree_centrality.keys()]
})

centrality_df = centrality_df.sort_values('degree_centrality', ascending=False)
centrality_df.head(10)

In [None]:
plt.figure(figsize=(12,6))
centrality_df.head(15).plot(
    kind='bar',
    x='airport',
    y='degree_centrality',
    legend=False
)
plt.title("Top 15 Hub Airports by Degree Centrality")
plt.ylabel("Degree Centrality")
plt.xlabel("Airport")
plt.show()

In [None]:
# Plot subgraph of top hubs for clarity
top_airports = centrality_df.head(10)['airport'].tolist()
subG = G.subgraph(top_airports)

plt.figure(figsize=(10,10))
pos = nx.spring_layout(subG, k=0.5, seed=42)

nx.draw_networkx_nodes(subG, pos, node_size=800)
nx.draw_networkx_edges(subG, pos, arrowstyle='->', arrowsize=15)
nx.draw_networkx_labels(subG, pos)

plt.title("Flight Network (Top Hub Airports)")
plt.axis('off')
plt.show()

In [None]:
if date_col:
    df['year'] = df[date_col].dt.year
    yearly_routes = df.groupby(['year', origin_col, dest_col]).size().reset_index(name='flights')
    yearly_routes.head()

In [None]:
centrality_df.to_csv("/content/airport_network_centrality.csv", index=False)
route_counts.to_csv("/content/route_volume_matrix.csv", index=False)

print("Route and network analysis outputs saved.")