In [1]:
import pandas as pd

# Read in the CSV file
df = pd.read_csv('Final_Asia-Only_Flight_Routes.csv')

# Assuming the CSV has columns 'Origin' and 'Destination'
# Count inbound flights (each time an airport appears as a destination)
inbound = df['Destination'].value_counts().reset_index()
inbound.columns = ['Airport', 'Inbound']

# Count outbound flights (each time an airport appears as an origin)
outbound = df['Origin'].value_counts().reset_index()
outbound.columns = ['Airport', 'Outbound']

# Merge the two counts on the airport name, filling missing values with 0
airport_counts = pd.merge(inbound, outbound, on='Airport', how='outer').fillna(0)

# Calculate total flights per airport
airport_counts['Total'] = airport_counts['Inbound'] + airport_counts['Outbound']

# (Optional) Sort by total flights for clarity
airport_counts = airport_counts.sort_values(by='Total', ascending=False)

print(airport_counts.head())

    Airport  Inbound  Outbound  Total
706     PVG      184     189.0  373.0
819     TFU      182     180.0  362.0
120     CAN      168     169.0  337.0
680     PEK      161     160.0  321.0
947     XIY      159     160.0  319.0


In [10]:
import pandas as pd
import altair as alt

# -------------------------------
# Step 1: Load and Clean the Data
# -------------------------------

# Read the CSV file (ensure the file name matches your CSV file)
df = pd.read_csv('Final_Asia-Only_Flight_Routes.csv', encoding='utf-8')

# Clean up column names (if necessary)
df.columns = df.columns.str.strip()

# -------------------------------
# Step 2: Aggregate Flight Data
# -------------------------------

# Group by 'Origin' and 'Origin Country' to count the number of flights per airport
airport_summary = df.groupby(['Origin', 'Origin Country'], as_index=False).size()
airport_summary.rename(columns={'size': 'Flight Count'}, inplace=True)

# Filter out airports with fewer than 100 flights
airport_summary = airport_summary[airport_summary['Flight Count'] >= 100]

# -------------------------------
# Step 3: Create the Bubble Chart
# -------------------------------

chart = alt.Chart(airport_summary).mark_circle().encode(
    x=alt.X('Origin:N', 
            title='Origin Airport', 
            sort=alt.SortField(field='Flight Count', order='descending')),
    y=alt.Y('Flight Count:Q', title='Number of Flights'),
    size=alt.Size('Flight Count:Q', scale=alt.Scale(range=[100, 1000]), title='Flight Count'),
    color=alt.Color('Origin Country:N', title='Origin Country'),
    tooltip=['Origin', 'Origin Country', 'Flight Count']
).properties(
    width=600,
    height=400,
    title='Flight Count by Origin Airport (>=100 Flights, Colored by Country)'
)

chart


In [12]:
import pandas as pd
import altair as alt

# -------------------------------
# Step 1: Load and Clean the Data
# -------------------------------

# Load the CSV file (adjust the file name/path as necessary)
df = pd.read_csv('Final_Asia-Only_Flight_Routes.csv', encoding='utf-8')

# Clean up column names and values (remove extra spaces)
df.columns = df.columns.str.strip()
df['Origin Country'] = df['Origin Country'].str.strip()

# -------------------------------
# Step 2: Aggregate Flight Time Data by Origin Country
# -------------------------------

# Calculate the average flight time per origin country
country_stats = df.groupby('Origin Country', as_index=False)['Flight Time (min)'].mean()
country_stats.rename(columns={'Flight Time (min)': 'Average Flight Time (min)'}, inplace=True)

# Also count the number of flights for each origin country
flight_counts = df.groupby('Origin Country', as_index=False).size()
flight_counts.rename(columns={'size': 'Flight Count'}, inplace=True)

# Merge the average flight time with the flight counts
country_stats = pd.merge(country_stats, flight_counts, on='Origin Country', how='left')

# -------------------------------
# Step 3: Create the Bar Chart
# -------------------------------

chart = alt.Chart(country_stats).mark_bar().encode(
    x=alt.X('Origin Country:N', 
            title='Origin Country', 
            sort=alt.SortField(field='Average Flight Time (min)', order='descending')),
    y=alt.Y('Average Flight Time (min):Q', title='Average Flight Time (min)'),
    color=alt.Color('Origin Country:N', legend=None),  # Color by country
    tooltip=['Origin Country', 'Average Flight Time (min)', 'Flight Count']
).properties(
    width=800,
    height=400,
    title='Average Flight Time by Origin Country'
)

chart
