In [14]:
import altair as alt
import pandas as pd

flights = pd.read_csv('boston_flights.csv') 
flights.head()

Unnamed: 0,Year,quarter,citymarketid_1,citymarketid_2,city1,city2,airportid_1,airportid_2,airport_1,airport_2,...,fare,carrier_lg,large_ms,fare_lg,carrier_low,lf_ms,fare_low,Geocoded_City1,Geocoded_City2,carrier_name
0,2018,1,30721,30693,"Boston, MA (Metropolitan Area)","Nashville, TN",10721,10693,BOS,BNA,...,147.04,WN,0.3854,141.85,B6,0.3487,136.92,"Boston, MA (Metropolitan Area)\n(42.358894, -7...","Nashville, TN\n(36.166687, -86.779932)",Southwest
1,2018,1,30721,30792,"Boston, MA (Metropolitan Area)","Buffalo, NY",10721,10792,BOS,BUF,...,126.14,B6,0.7186,125.05,B6,0.7186,125.05,"Boston, MA (Metropolitan Area)\n(42.358894, -7...","Buffalo, NY\n(39.945422, -78.64671)",JetBlue
2,2018,1,30721,32575,"Boston, MA (Metropolitan Area)","Los Angeles, CA (Metropolitan Area)",10721,10800,BOS,BUR,...,298.76,UA,0.4017,330.14,B6,0.1282,231.77,"Boston, MA (Metropolitan Area)\n(42.358894, -7...","Los Angeles, CA (Metropolitan Area)\n(34.05223...",United
3,2018,1,30721,30852,"Boston, MA (Metropolitan Area)","Washington, DC (Metropolitan Area)",10721,10821,BOS,BWI,...,119.44,WN,0.6038,121.73,B6,0.349,119.8,"Boston, MA (Metropolitan Area)\n(42.358894, -7...","Washington, DC (Metropolitan Area)\n(38.892062...",Southwest
4,2018,1,30721,30647,"Boston, MA (Metropolitan Area)","Cleveland, OH (Metropolitan Area)",10721,10874,BOS,CAK,...,174.14,AA,0.4867,168.39,UA,0.1947,150.5,"Boston, MA (Metropolitan Area)\n(42.358894, -7...","Cleveland, OH (Metropolitan Area)\n(41.505546,...",American


In [15]:
flights['year_quarter'] = flights['Year'].astype(str) + '-Q' + flights['quarter'].astype(str)

aggregated_data = flights.groupby(['carrier_name', 'year_quarter'], as_index = False)['fare'].mean()

carrier_dropdown = alt.binding_select(options = list(aggregated_data['carrier_name'].unique()), name = "Carrier Options: ")
carrier_select = alt.selection_point(fields = ['carrier_name'], bind = carrier_dropdown)

chart1 = alt.Chart(aggregated_data).mark_line(
    strokeWidth = 3,
    interpolate = 'monotone'
).encode(
    alt.X('year_quarter:N', title = 'Year-Quarter', axis = alt.Axis(labelAngle = 45)),
    alt.Y('fare:Q', title = 'Average Fare'),
    alt.Color('carrier_name:N', legend = alt.Legend(title='Carrier')),
).add_params(carrier_select).transform_filter(carrier_select).properties(
    title='Line Plot of Average Fares by Each Carrier Over Quarters and Years', width = 800, height = 400
)

chart1

chart1.save('lineplot_fares_years.json')
chart1.save('lineplot_fares_years.png')

In [16]:
bar1 = alt.Chart(flights).mark_bar().encode(
    alt.X('carrier_name:N', title='Carrier Name'), 
    alt.Y('count()', title='Number of Flights'), 
    alt.Color('carrier_name:N', title='Carrier', legend=None), 
    alt.Tooltip(['carrier_name', 'city2', 'count()']) 
).properties(
    title='Bar Chart of Number of Flights by Carrier',
    width=500,
    height=500
)

bar1

bar1.save('barchart_flights_carriers.json')
bar1.save('barchart_flights_carriers.png')

In [17]:
selection = alt.selection_point(
    fields = ['city2'], 
    bind = alt.binding_select(
        name = "Select City:", 
        options = flights['city2'].unique().tolist()
    )
)

scatter_plot = alt.Chart(flights).mark_circle().encode(
    alt.X('passengers:Q', title = 'Passengers'),
    alt.Y('fare_lg:Q', title = 'Average Fare'),
    color = alt.condition(
        selection, 
        alt.Color('carrier_name:N', scale = alt.Scale(scheme = 'magma')),
        alt.value('lightgrey')
    ),
    opacity = alt.condition(selection, alt.value(1), alt.value(0.1)),
    tooltip = ['carrier_name:N', 'fare_lg:Q', 'passengers:Q', 'average_ms:Q']
).transform_filter(selection).properties(
    title = 'Scatter Plot of Average Fare vs. Passengers by City',
    height = 400, 
    width = 400
)

bar_chart = alt.Chart(flights).mark_bar().encode(
    alt.X('carrier_name:N', title = 'Carrier'),
    alt.Y('mean(large_ms):Q', title = 'Average Market Share'),
    tooltip = ['carrier_name:N', 'mean(large_ms):Q']
).transform_filter(selection).properties(
    title = 'Bar Chart of Average Market Share by Carrier',
    height = 300, 
    width = 300
)

final_chart = (scatter_plot.add_params(selection) | bar_chart.add_params(selection)).configure_legend(orient = 'left')

final_chart
final_chart.save('scatter_bar_fare_by_passengers_by_ms.json')
final_chart.save('scatter_bar_fare_by_passengers_by_ms.png')

In [18]:
chart2 = alt.Chart(flights).mark_circle().encode(
    alt.X('nsmiles:Q', title = 'Flight Miles'),
    alt.Y('passengers:Q', title = 'Number of Passengers'),
    alt.Color(
        'fare:Q',
        scale = alt.Scale(scheme = 'plasma'),
        legend = alt.Legend(title = "Fare", orient = "right"))
).properties(
    title = 'Scatter Plot of Miles Traveled vs. Passengers by Fare',
    width = 500, height = 500
)

selection = alt.selection_interval()
chart2 = chart2.add_params(selection)

bar2 = alt.Chart(flights).mark_bar().encode(
    alt.X('city2:N', title = 'Destination City'),
    alt.Y('count():Q', title = 'Number of Flights', sort = '-x'),  
    alt.Color('carrier_name:N', title = 'Carrier'),
).transform_filter(
    selection
).properties(
    title = 'Bar Chart of Flights to Destination Cities By Carrier',
    width = 500, height = 500
)

visual4 = chart2 | bar2
visual4.save('scatterplot_barchart_miles_passengers.json')
visual4.save('scatterplot_barchart_miles_passengers.png')