In [1]:
import pandas as pd
import altair as alt

# Disable row limit
alt.data_transformers.disable_max_rows()

import os
os.chdir(os.path.expanduser("~/Downloads"))
print("Now looking in:", os.getcwd())

# Load data
routes_df = pd.read_csv("Final_Asia-Only_Flight_Routes.csv")
coords_df = pd.read_csv("airport_coordinates.csv").dropna(subset=["latitude", "longitude"])

# Merge origin coordinates
routes_with_origin = routes_df.merge(
    coords_df[['iata', 'latitude', 'longitude']], 
    left_on='Origin IATA', right_on='iata', how='inner'
).rename(columns={'latitude': 'origin_lat', 'longitude': 'origin_lon'})

# Merge destination coordinates
routes_full = routes_with_origin.merge(
    coords_df[['iata', 'latitude', 'longitude']], 
    left_on='Destination', right_on='iata', how='inner'
).rename(columns={'latitude': 'dest_lat', 'longitude': 'dest_lon'})

# Rename columns for clean tooltip labels
routes_full = routes_full.rename(columns={
    "Origin City": "OriginCity",
    "Origin Country": "OriginCountry"
})

# Background map with a modern look
world_map = alt.topo_feature('https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json', 'countries')
background = alt.Chart(world_map).mark_geoshape(
    fill='#f7f4ed',       # soft beige land
    stroke='white'
).project(
    type='equalEarth',
    scale=250,
    translate=[400, 250],
    center=[90, 30]
).properties(width=800, height=500)

# Colored route lines (by airline, or just visually pleasing)
routes_chart = alt.Chart(routes_full).mark_line(opacity=0.4).encode(
    longitude='origin_lon:Q',
    latitude='origin_lat:Q',
    longitude2='dest_lon:Q',
    latitude2='dest_lat:Q',
    color=alt.Color('Airlines:N', title='Airline', legend=None),
    tooltip=[
        alt.Tooltip('Origin IATA:N', title='Origin'),
        alt.Tooltip('Destination:N', title='Destination'),
        alt.Tooltip('Flight Distance (km):Q', title='Distance (km)'),
        alt.Tooltip('Flight Time (min):Q', title='Flight Time (min)'),
        alt.Tooltip('Airlines:N', title='Airlines')
    ]
).project(
    type='equalEarth',
    scale=250,
    translate=[400, 250],
    center=[90, 30]
)

# Combine the layers
chart = alt.layer(background, routes_chart).properties(
    title=alt.TitleParams(
        text='Asian Flight Routes',
        subtitle='Colored by Airline | Hover for Details',
        fontSize=20,
        subtitleFontSize=14,
        anchor='start'
    )
)

chart


Now looking in: C:\Users\ianme\Downloads


In [2]:
import pandas as pd
import altair as alt

# -------------------------------
# Step 1: Load and Clean the Data
# -------------------------------

# Read the CSV file (ensure the file name matches your CSV file)
df = pd.read_csv('Final_Asia-Only_Flight_Routes.csv', encoding='utf-8')

# Clean up column names (if necessary)
df.columns = df.columns.str.strip()

# -------------------------------
# Step 2: Aggregate Flight Data
# -------------------------------

# Group by 'Origin' and 'Origin Country' to count the number of flights per airport
airport_summary = df.groupby(['Origin', 'Origin Country'], as_index=False).size()
airport_summary.rename(columns={'size': 'Flight Count'}, inplace=True)

# Filter out airports with fewer than 100 flights
airport_summary = airport_summary[airport_summary['Flight Count'] >= 100]

# -------------------------------
# Step 3: Create the Bubble Chart
# -------------------------------

chart = alt.Chart(airport_summary).mark_circle().encode(
    x=alt.X('Origin:N', 
            title='Origin Airport', 
            sort=alt.SortField(field='Flight Count', order='descending')),
    y=alt.Y('Flight Count:Q', title='Number of Flights'),
    size=alt.Size('Flight Count:Q', scale=alt.Scale(range=[100, 1000]), title='Flight Count'),
    color=alt.Color('Origin Country:N', title='Origin Country'),
    tooltip=['Origin', 'Origin Country', 'Flight Count']
).properties(
    width=600,
    height=400,
    title='Flight Count by Origin Airport (>=100 Flights, Colored by Country)'
)

chart

In [3]:
import pandas as pd
import altair as alt

# -------------------------------
# Step 1: Load and Clean the Data
# -------------------------------

# Load the CSV file (adjust the file name/path as necessary)
df = pd.read_csv('Final_Asia-Only_Flight_Routes.csv', encoding='utf-8')

# Clean up column names and values (remove extra spaces)
df.columns = df.columns.str.strip()
df['Origin Country'] = df['Origin Country'].str.strip()

# -------------------------------
# Step 2: Aggregate Flight Time Data by Origin Country
# -------------------------------

# Calculate the average flight time per origin country
country_stats = df.groupby('Origin Country', as_index=False)['Flight Time (min)'].mean()
country_stats.rename(columns={'Flight Time (min)': 'Average Flight Time (min)'}, inplace=True)

# Also count the number of flights for each origin country
flight_counts = df.groupby('Origin Country', as_index=False).size()
flight_counts.rename(columns={'size': 'Flight Count'}, inplace=True)

# Merge the average flight time with the flight counts
country_stats = pd.merge(country_stats, flight_counts, on='Origin Country', how='left')

# -------------------------------
# Step 3: Create the Bar Chart
# -------------------------------

chart = alt.Chart(country_stats).mark_bar().encode(
    x=alt.X('Origin Country:N', 
            title='Origin Country', 
            sort=alt.SortField(field='Average Flight Time (min)', order='descending')),
    y=alt.Y('Average Flight Time (min):Q', title='Average Flight Time (min)'),
    color=alt.Color('Origin Country:N', legend=None),  # Color by country
    tooltip=['Origin Country', 'Average Flight Time (min)', 'Flight Count']
).properties(
    width=800,
    height=400,
    title='Average Flight Time by Origin Country'
)

chart


In [21]:
import pandas as pd
import altair as alt

# Load data
routes_df = pd.read_csv("Final_Asia-Only_Flight_Routes.csv")
coords_df = pd.read_csv("airport_coordinates.csv").dropna(subset=["latitude", "longitude"])

# Rename columns
routes_df = routes_df.rename(columns={
    'Origin IATA': 'source',
    'Destination': 'destination'
})
coords_df = coords_df.rename(columns={'iata': 'airport'})

# Fix bad coordinates for DEL
coords_df.loc[coords_df['airport'] == 'DEL', ['latitude', 'longitude']] = [28.5562, 77.1000]

# Remove bad lat/lon
coords_df = coords_df[
    (coords_df['latitude'].between(-90, 90)) &
    (coords_df['longitude'].between(-180, 180))
]

# Count flights per airport
source_counts = routes_df['source'].value_counts().reset_index()
source_counts.columns = ['airport', 'count']
destination_counts = routes_df['destination'].value_counts().reset_index()
destination_counts.columns = ['airport', 'count']
flight_counts = pd.concat([source_counts, destination_counts]) \
                  .groupby('airport').sum().reset_index()

# Merge flight counts with coordinates
heatmap_df = coords_df.merge(flight_counts, on='airport')

# Filter to Asia
heatmap_df = heatmap_df[
    (heatmap_df['latitude'] >= -10) &
    (heatmap_df['latitude'] <= 60) &
    (heatmap_df['longitude'] >= 60) &
    (heatmap_df['longitude'] <= 150)
]

# Load map
world = alt.topo_feature('https://cdn.jsdelivr.net/npm/world-atlas@2/countries-50m.json', 'countries')

# Base map of Asia
base = alt.Chart(world).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project(
    type='mercator',
    center=[100, 30],
    scale=600
).properties(
    width=800,
    height=500,
    title='Heatmap of Flight Activity at Asian Airports'
)

# Airport dots (raw lat/lon encoded, same projection as map)
airport_dots = alt.Chart(heatmap_df).mark_circle(opacity=0.8).encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.Size('count:Q', scale=alt.Scale(range=[10, 500]), legend=None),
    color=alt.Color('count:Q', scale=alt.Scale(scheme='orangered'), legend=alt.Legend(title='Flight Count')),
    tooltip=['airport', 'city_name', 'country', 'count']
).project(
    type='mercator',
    center=[100, 30],
    scale=600
)

# Combine and show
(base + airport_dots).configure_view(stroke=None)


In [4]:
import altair as alt
import pandas as pd

top_dest = routes_df['Destination'].value_counts().head(20).reset_index()
top_dest.columns = ['Destination', 'Flight Count']

bar_chart = alt.Chart(top_dest).mark_bar(color='#4682B4').encode(
    x=alt.X('Flight Count:Q', title='Number of Flights'),
    y=alt.Y('Destination:N', sort='-x', title='Destination Airport'),
    tooltip=['Destination', 'Flight Count']
).properties(
    title='Top 20 Destination Airports in Asia',
    width=600,
    height=400
)

bar_chart


In [5]:
avg_distance = routes_df.groupby('Origin Country')['Flight Distance (km)'].mean().reset_index()
avg_chart = alt.Chart(avg_distance).mark_bar(color='#66C2A5').encode(
    x=alt.X('Flight Distance (km):Q', title='Average Flight Distance (km)'),
    y=alt.Y('Origin Country:N', sort='-x', title='Country'),
    tooltip=['Origin Country', 'Flight Distance (km):Q']
).properties(
    title='Average Flight Distance by Origin Country',
    width=700,
    height=500
)

avg_chart

In [6]:
scatter = alt.Chart(routes_df).mark_circle(size=40, opacity=0.3).encode(
    x='Flight Distance (km):Q',
    y='Flight Time (min):Q',
    tooltip=['Origin IATA', 'Destination', 'Airlines'],
    color=alt.Color('Airlines:N', legend=None)
).properties(width=600, height=400, title='Flight Time vs. Distance')

trend = scatter.transform_regression(
    'Flight Distance (km)', 'Flight Time (min)', method='linear'
).mark_line(color='red')

scatter + trend
