## Bokeh Setup

In [31]:
from bokeh.io import output_notebook
output_notebook()

## Import data
source: https://transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FIL&QO_fu146_anzr=Nv4%20Pn44vr45

> This table contains domestic market data reported by U.S. air carriers, including carrier, origin, destination, and service class for enplaned passengers, freight and mail when both origin and destination airports are located within the boundaries of the United States and its territories.

In [32]:
import pandas as pd

domestic_carriers_df = pd.read_csv(
    "../data/1035327695_T_T100D_MARKET_US_CARRIER_ONLY.zip",
    compression="zip",
    dtype={
        "PASSENGERS": "int64",
        "FREIGHT": "int64",
        "MAIL": "int64",
        "DISTANCE": "int64",
        }
    )
domestic_carriers_df.drop("Unnamed: 22", axis=1, inplace=True)  # TBD ?!
domestic_carriers_df.head()

Unnamed: 0,PASSENGERS,FREIGHT,MAIL,DISTANCE,UNIQUE_CARRIER,UNIQUE_CARRIER_NAME,CARRIER,CARRIER_NAME,CARRIER_GROUP_NEW,ORIGIN_AIRPORT_ID,...,ORIGIN_STATE_ABR,ORIGIN_STATE_NM,DEST_AIRPORT_ID,DEST,DEST_CITY_NAME,DEST_STATE_ABR,DEST_STATE_NM,YEAR,QUARTER,MONTH
0,0,0,0,1180,AMQ,Ameristar Air Cargo,AMQ,Ameristar Air Cargo,4,10171,...,TX,Texas,14711,SCE,"State College, PA",PA,Pennsylvania,2021,1,1
1,0,0,0,166,AMQ,Ameristar Air Cargo,AMQ,Ameristar Air Cargo,4,11049,...,TX,Texas,10171,ADS,"Dallas, TX",TX,Texas,2021,1,1
2,0,0,0,751,AMQ,Ameristar Air Cargo,AMQ,Ameristar Air Cargo,4,12878,...,IN,Indiana,10171,ADS,"Dallas, TX",TX,Texas,2021,1,1
3,0,0,0,1073,AMQ,Ameristar Air Cargo,AMQ,Ameristar Air Cargo,4,16091,...,MI,Michigan,11049,CLL,"College Station/Bryan, TX",TX,Texas,2021,1,1
4,0,0,0,563,AMQ,Ameristar Air Cargo,AMQ,Ameristar Air Cargo,4,10171,...,TX,Texas,11540,ELP,"El Paso, TX",TX,Texas,2021,1,1


In [33]:
domestic_carriers_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 176531 entries, 0 to 176530
Data columns (total 22 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   PASSENGERS           176531 non-null  int64 
 1   FREIGHT              176531 non-null  int64 
 2   MAIL                 176531 non-null  int64 
 3   DISTANCE             176531 non-null  int64 
 4   UNIQUE_CARRIER       176531 non-null  object
 5   UNIQUE_CARRIER_NAME  176531 non-null  object
 6   CARRIER              176531 non-null  object
 7   CARRIER_NAME         176531 non-null  object
 8   CARRIER_GROUP_NEW    176531 non-null  int64 
 9   ORIGIN_AIRPORT_ID    176531 non-null  int64 
 10  ORIGIN               176531 non-null  object
 11  ORIGIN_CITY_NAME     176531 non-null  object
 12  ORIGIN_STATE_ABR     176531 non-null  object
 13  ORIGIN_STATE_NM      176531 non-null  object
 14  DEST_AIRPORT_ID      176531 non-null  int64 
 15  DEST                 176531 non-nu

## Set constants (to be dynamically set by widgets in the future)

In [34]:
MEASUREMENTS = ["passengers", "freight", "mail"]
MEASUREMENT = MEASUREMENTS[2]  # So far, MEASUREMENT can only have a single value
NUMBER_OF_AIRLINES_TO_CONSIDER = 15

## Find N largest carriers by measurement


In [35]:
biggest_carriers = domestic_carriers_df.groupby(['UNIQUE_CARRIER_NAME'])[MEASUREMENT.upper()].sum().reset_index()
biggest_carriers = biggest_carriers.sort_values(by=MEASUREMENT.upper(), ascending=False)[:NUMBER_OF_AIRLINES_TO_CONSIDER]
biggest_carriers["POSITION"] = range(1, len(biggest_carriers) + 1)  # add position column for tooltip
biggest_carriers.reset_index(drop=True, inplace=True)
biggest_carriers

Unnamed: 0,UNIQUE_CARRIER_NAME,MAIL,POSITION
0,United Parcel Service,353580883,1
1,Delta Air Lines Inc.,132699832,2
2,United Air Lines Inc.,106646731,3
3,American Airlines Inc.,105405252,4
4,Kalitta Air LLC,68185665,5
5,Alaska Airlines Inc.,33110304,6
6,Tatonduk Outfitters Limited d/b/a Everts Air A...,30473255,7
7,Northern Air Cargo Inc.,24341767,8
8,Aloha Air Cargo,20362697,9
9,Lynden Air Cargo Airlines,16312265,10


## Plot the N largest carriers by measurement

In [36]:
from bokeh.plotting import figure, show
from bokeh.models import NumeralTickFormatter

plot_title = f"Top {NUMBER_OF_AIRLINES_TO_CONSIDER} carriers by domestic {MEASUREMENT}"

TOOLTIPS = [
    ("Index", "$index"),  # TBD remove, use position instead. OR: display index + 1
    ("Position", "@POSITION"),  # TBD, use data from POSITION column
    ("Carrier", "@x"),
    (MEASUREMENT.capitalize(), "@PASSENGERS"),  # TBD, use MEASUREMENT instead
]

##############
# An example of what a tooltip should look like:
#
# Position: 1
# Carrier: United Parcel Service
# Mail: 353,580,883
#
##############

p = figure(x_range=biggest_carriers["UNIQUE_CARRIER_NAME"], title=plot_title, height=300, sizing_mode="stretch_width", tooltips=TOOLTIPS)
p.vbar(x=biggest_carriers["UNIQUE_CARRIER_NAME"], top=biggest_carriers[MEASUREMENT.upper()], legend_label=MEASUREMENT.capitalize(), width=0.6)

p.xgrid.grid_line_color = None
p.yaxis.formatter = NumeralTickFormatter(format="0,0")
# from math import pi
# p.xaxis.major_label_orientation = pi/4  # rotate labels

show(p)