In [9]:
FILE = "../data/T_T100D_MARKET_US_CARRIER_ONLY_20220506_205137.zip"

import pandas as pd

from bokeh.io import output_notebook
output_notebook() 

domestic_carriers_df = pd.read_csv(
    FILE,
    compression="zip",
    usecols=range(22),
    dtype={
        "PASSENGERS": "int64",
        "FREIGHT": "int64",
        "MAIL": "int64",
        "DISTANCE": "int64",
        },
    )
domestic_carriers_df.columns = map(str.lower, domestic_carriers_df.columns)
domestic_carriers_df.head()

Unnamed: 0,passengers,freight,mail,distance,unique_carrier,unique_carrier_name,carrier,carrier_name,carrier_group_new,origin_airport_id,...,origin_state_abr,origin_state_nm,dest_airport_id,dest,dest_city_name,dest_state_abr,dest_state_nm,year,quarter,month
0,0,303913,0,2547,5X,United Parcel Service,5X,United Parcel Service,3,13891,...,CA,California,14307,PVD,"Providence, RI",RI,Rhode Island,2021,4,11
1,0,47738,0,2090,5X,United Parcel Service,5X,United Parcel Service,3,13891,...,CA,California,14122,PIT,"Pittsburgh, PA",PA,Pennsylvania,2021,4,11
2,0,99406,0,1876,5X,United Parcel Service,5X,United Parcel Service,3,13891,...,CA,California,12884,LAN,"Lansing, MI",MI,Michigan,2021,4,11
3,0,1223074,0,1188,5X,United Parcel Service,5X,United Parcel Service,3,13891,...,CA,California,11298,DFW,"Dallas/Fort Worth, TX",TX,Texas,2021,4,11
4,0,475548,136603,2482,5X,United Parcel Service,5X,United Parcel Service,3,13891,...,CA,California,10529,BDL,"Hartford, CT",CT,Connecticut,2021,4,11


Groupby carrier name:

In [8]:
carriers_df = domestic_carriers_df.groupby("unique_carrier_name").agg({"passengers": "sum", "freight": "sum", "mail": "sum"})
carriers_df.reset_index(inplace=True)
# carriers_df

Create dfs for passengers, freight, and mail and then plot them in tabs

In [3]:
import copy
from math import pi

from bokeh.models import Panel, Tabs
from bokeh.palettes import Viridis
from bokeh.plotting import figure, show
from bokeh.transform import cumsum

# create list of colors (Spectral10 plus gray for "other")
colors = list(Viridis[10])
colors.append("#808080")

In [4]:
def create_dfs(df, categories):
    """
    Create dict of dfs for each category
    """
    dfs = {}
    for category in categories:
        # create copy of df for current category
        category_df = df
        # sort dataframe by current category
        category_df = category_df.sort_values(category, ascending=False)
        category_df.reset_index(inplace=True, drop=True)
        # remove rows that are not the current category
        remove_columns = copy.deepcopy(categories)
        remove_columns.remove(category)
        category_df.drop(columns=remove_columns, inplace=True)
        # sum values for "others" (all carriers not in top 10)
        top_ten_by_category = category_df.iloc[:10]["unique_carrier_name"]
        other_sum = category_df[~category_df["unique_carrier_name"].isin(top_ten_by_category)][category].sum()
        # create dataframe for top 10 of current category plus others
        category_df = category_df[category_df["unique_carrier_name"].isin(top_ten_by_category)]
        category_df.loc[len(category_df.index)] = ["Others", other_sum]
        # add column with annular wedge angles
        category_df['angle'] = category_df[category]/category_df[category].sum() * 2*pi
        # assign colors to carriers
        category_df['color'] = colors
        # add category dataframe to dict of dataframes
        dfs[category] = category_df

    return dfs

In [5]:
def create_annular_wedge(df_dict, category):

    TOOLTIPS = [
        ("Carrier", "@unique_carrier_name"),
        (category.capitalize(), f"@{category}{{(0,0)}}"),
    ]

    annular_plot = figure(height=300, toolbar_location=None, outline_line_color=None,
                    sizing_mode="scale_width", name="region", x_range=(-0.66, 1),
                    tooltips=TOOLTIPS)

    annular_plot.annular_wedge(x=0, y=0, inner_radius=0.2, outer_radius=0.4,
                    start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
                    line_color="white", fill_color='color', legend_field ='unique_carrier_name', source=df_dict[category])

    annular_plot.axis.visible=False
    annular_plot.grid.grid_line_color = None
    annular_plot.legend.spacing = 1

    return annular_plot

In [7]:
# list of categories to consider
categories = ["passengers", "freight", "mail"]
# create dataframes for each category
dfs = create_dfs(carriers_df, ["passengers", "freight", "mail"])

# create tabs with annular wedges for each category
tabs = []
for category in categories:
    tabs.append(Panel(child=create_annular_wedge(dfs, category), title=category.capitalize()))

# display all plots as tabs
show(Tabs(tabs=tabs))

## To-Dos:
* add a number lable into the wedge itself
* break long lines in legend (Mail!)
* In the user guide, shouldn't tabs be in "Creating layouts" rather than in "Adding widgets"?