In [None]:
import os
import pandas as pd
import numpy as np

os.chdir("/content/drive/MyDrive/Projects/Flight_Delay_Predict_Project") #Default Project directory
df = pd.read_csv('CodeFiles/Full_airline_data.csv')

#Get the list of the 10 biggest Airline Carriers
top10_carrier = list(df['OP_CARRIER'].value_counts().head(10).index)

#Read the Carrier Key CSV file for full names of the Airlines
carrier_key = pd.read_csv('data/airline/airlines.csv')

In [None]:
#Visualize the POSITIVE_ARR_DELAYS for top 10 airlines 

#Get the list of the 10 biggest Airline Carriers
df_sub = df[df['OP_CARRIER'].isin(top10_carrier)][['OP_CARRIER','DEP_HOUR','ARR_DELAY']].copy()

#Since we are considering only flight delays, we ignore early landings
#ARR_DELAY < 0 means the flight landed before its CRS_ARR_TIME
df_sub['ARR_DELAY'] = df_sub['ARR_DELAY'].apply(lambda x:0 if x<0 else x)

#Merge the airline names
df_sub = pd.merge(df_sub,carrier_key,how='left',left_on='OP_CARRIER', right_on='IATA_CODE')
df_sub.drop(['OP_CARRIER','IATA_CODE'],axis=1,inplace=True)

In [None]:
#Create an numpy array to store the averaged carrier delays
arr_delay = np.ndarray(shape=(10,25)) #1 for each hour of the day, and 1 for daily averages

#Mean delay (daily average)
df_mean = df_sub[['ARR_DELAY','AIRLINE']].groupby('AIRLINE').mean().reset_index()
# df_mean = pd.merge(df_mean,carrier_key,how='left',on='IATA_CODE')

arr_delay[:,24] = np.array(df_mean['ARR_DELAY'])

#delays grouped by Departure Hour
df_hourly = df_sub.groupby(by=['AIRLINE','DEP_HOUR']).mean()['ARR_DELAY'].unstack()
df_hourly.fillna(0,inplace=True)
arr_delay[:,0:24]=np.array(df_hourly)

arr_delay = arr_delay*2.0

In [None]:
###  Interaction in Bokeh charts using widgets

from bokeh.models import  Callback, ColumnDataSource, Rect,CustomJS, LabelSet, Column
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.layouts import row, column,widgetbox
from bokeh.models import CustomJS, Slider, Select
import numpy as np
from math import pi


#data
carrier = df_mean['AIRLINE'].tolist()
ages_gen = '0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24'.split()

#source for callback
source = ColumnDataSource(data=dict(x=carrier, y = np.zeros(10),height = arr_delay[:,24], 
                                     height0 = arr_delay[:,0],
                                     height1 = arr_delay[:,1],
                                     height2 = arr_delay[:,2],
                                     height3 = arr_delay[:,3],
                                     height4 = arr_delay[:,4],
                                     height5 = arr_delay[:,5],
                                     height6 = arr_delay[:,6],
                                     height7 = arr_delay[:,7],
                                     height8 = arr_delay[:,8],
                                     height9 = arr_delay[:,9],
                                     height10 = arr_delay[:,10],
                                     height11 = arr_delay[:,11],
                                     height12 = arr_delay[:,12],
                                     height13 = arr_delay[:,13],
                                     height14 = arr_delay[:,14],
                                     height15 = arr_delay[:,15],
                                     height16 = arr_delay[:,16],
                                     height17 = arr_delay[:,17],
                                     height18 = arr_delay[:,18],
                                     height19 = arr_delay[:,19],
                                     height20 = arr_delay[:,20],
                                     height21 = arr_delay[:,21],
                                     height22 = arr_delay[:,22],
                                     height23 = arr_delay[:,23],
                                     height24 = arr_delay[:,24]))

source2 = ColumnDataSource(data=dict(title=["Daily Average"],h1 = ["Hourly"],h2 = ["Daily Average"]))


#Javascript Callbacks for Departure Hour
Callback_Hour = CustomJS(args={'source1': source,'source2':source2}, code="""
        var f = cb_obj.get('value');
        var data1 = source1.get('data');
        var data2 = source2.get('data');
        
        data1['height'] = data1['height'+f.toString()];
        source1.trigger('change');
        
        var f = cb_obj.get('value');
        if (f==24){
            data2['title'] = data2['h2'];
            source2.trigger('change');
            }
        else {
            data2['title'] = data2['h1'];
            source2.trigger('change');
            }
    """)

#Figure for Stacked bar chart
p1 = figure(title="Average flight delay", 
            x_range=carrier, y_range=[0, 30],
            plot_width=600, plot_height = 350,
            outline_line_color= None,
            toolbar_location='above')
p1.background_fill_color = '#e3e0db' 
p1.xaxis.major_label_orientation = pi/4
p1.xaxis.axis_label = 'Airline Carrier'
p1.yaxis.axis_label = 'Average Delay'


#Create Barchart using rect glyphs
p1.rect(x ='x', y ='y', width =.8, height = 'height', source = source, color="#720017", alpha=0.95, name = "Airline Delay")
#Create label 
labels = LabelSet(x=0, y=0, text='title', level='glyph',x_offset=35, y_offset=170, source=source2, render_mode='canvas')
p1.add_layout(labels)

#Create the slider
hour_slider = Slider(start=0, end=24, value=24, step=1,title="Departure Hour")
hour_slider.js_on_change('value', Callback_Hour)
#Alternately:
#hour_slider = Select(title="Departure Hour:", value=ages_gen[24], options= ages_gen)

layout = column(p1,Column(hour_slider))
output_file("slider.html", title="slider.py example")

show(layout)

In [None]:
###  Interaction in Bokeh charts using widgets

from bokeh.models import  Callback, ColumnDataSource, Rect,CustomJS, LabelSet
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.layouts import row, column,widgetbox
from bokeh.models import CustomJS, Slider, Select
import numpy as np
from math import pi


#data
carrier = df_mean['AIRLINE'].tolist()
ages_gen = '0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24'.split()

#source for callback
source = ColumnDataSource(data=dict(x=carrier, y = np.zeros(10),height = arr_delay[:,24], 
                                     height0 = arr_delay[:,0],
                                     height1 = arr_delay[:,1],
                                     height2 = arr_delay[:,2],
                                     height3 = arr_delay[:,3],
                                     height4 = arr_delay[:,4],
                                     height5 = arr_delay[:,5],
                                     height6 = arr_delay[:,6],
                                     height7 = arr_delay[:,7],
                                     height8 = arr_delay[:,8],
                                     height9 = arr_delay[:,9],
                                     height10 = arr_delay[:,10],
                                     height11 = arr_delay[:,11],
                                     height12 = arr_delay[:,12],
                                     height13 = arr_delay[:,13],
                                     height14 = arr_delay[:,14],
                                     height15 = arr_delay[:,15],
                                     height16 = arr_delay[:,16],
                                     height17 = arr_delay[:,17],
                                     height18 = arr_delay[:,18],
                                     height19 = arr_delay[:,19],
                                     height20 = arr_delay[:,20],
                                     height21 = arr_delay[:,21],
                                     height22 = arr_delay[:,22],
                                     height23 = arr_delay[:,23],
                                     height24 = arr_delay[:,24]))

source2 = ColumnDataSource(data=dict(title=["Daily Average"],h1 = ["Hourly"],h2 = ["Daily Average"]))


#Javascript Callbacks for Departure Hour
Callback_Hour = CustomJS(args={'source1': source,'source2':source2}, code="""
        var f = cb_obj.get('value');
        var data1 = source1.get('data');
        var data2 = source2.get('data');
        
        data1['height'] = data1['height'+f.toString()];
        source1.trigger('change');
        
        var f = cb_obj.get('value');
        if (f==24){
            data2['title'] = data2['h2'];
            source2.trigger('change');
            }
        else {
            data2['title'] = data2['h1'];
            source2.trigger('change');
            }
    """)

#Figure for Stacked bar chart
p1 = figure(title="Average flight delay", 
            x_range=carrier, y_range=[0, 30],
            plot_width=600, plot_height = 350,
            outline_line_color= None,
            toolbar_location='above')
p1.background_fill_color = '#e3e0db' 
p1.xaxis.major_label_orientation = pi/4
p1.xaxis.axis_label = 'Airline Carrier'
p1.yaxis.axis_label = 'Average Delay'


#Create Barchart using rect glyphs
p1.rect(x ='x', y ='y', width =.8, height = 'height', source = source, color="#720017", alpha=0.95, name = "Airline Delay")
#Create label 
labels = LabelSet(x=0, y=0, text='title', level='glyph',x_offset=35, y_offset=170, source=source2, render_mode='canvas')
p1.add_layout(labels)

#Create the slider
hour_slider = Slider(start=0, end=24, value=24, step=1,title="Departure Hour")
hour_slider.js_on_change("value",Callback_Hour)
#Alternately:
#hour_slider = Select(title="Departure Hour:", value=ages_gen[24], options= ages_gen,  callback = Callback_Hour)

layout = column(p1,column(hour_slider))
output_file("slider2.html", title="slider.py example")
print("Done")

show(layout)

Done


In [None]:
#Create the javascript code and script tag to embed in the blog
from bokeh.resources import CDN
from bokeh.embed import autoload_static

js, tag = autoload_static(layout, CDN, "bokeh_js/interact_barchart_select.js")

f = open( 'interact_barchart_select.js', 'w' )
f.write(js)
f.close()
print(tag)


<script src="bokeh_js/interact_barchart_select.js" id="dce27546-4df5-4d55-9c68-307297d48636"></script>


In [None]:
df_sub = df[['OP_CARRIER','DAY_OF_WEEK','DEP_HOUR','ARR_DELAY']]

#Remove datapoints which have erroneous DAY_OF_WEEK
df_sub = df_sub.drop(df_sub[df_sub['DAY_OF_WEEK']==9].index) 
#Since we are considering only flight delays, we ignore early landings
#ARR_DELAY < 0 means the flight landed before its CRS_ARR_TIME
df_sub['ARR_DELAY'] = df_sub['ARR_DELAY'].apply(lambda x:0 if x<0 else x)

In [None]:
#Get the list of the 10 biggest Airline Carriers
top10_carrier = pd.DataFrame(df_sub['OP_CARRIER'].value_counts().head(10))
top10_carrier.reset_index(inplace=True)
top10_carrier.rename(index=str, columns={"index": "IATA"},inplace=True)

In [None]:
#Load the L_UNIQUE_CARRIERS.csv for the full names of the IATA codes
carrier_key = pd.read_csv('airlines.csv')
carrier_key.rename(index=str, columns={"IATA_CODE": "IATA"},inplace=True)

#Merge with the dataframe containing the 10 biggest Airline Carriers
top10_carrier = pd.merge(top10_carrier,carrier_key,how='left',on='IATA')

In [None]:
import holoviews as hv
from holoviews import dim, opts
#from bokeh.charts import HeatMap
from bokeh.io import output_file, show
from bokeh.palettes import OrRd9
from bokeh.models import Range1d, LinearColorMapper, ColorBar
from bokeh.models.widgets import Panel, Tabs

In [None]:
def create_heatmap(df_sub, desc='Average Flight Delays'):
    dayHour = df_sub.groupby(by=['DAY_OF_WEEK','DEP_HOUR']).mean()
    dayHour.reset_index(inplace=True)
    
    dayHour['DAY_OF_WEEK'].replace(1,'Monday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(2,'Tuesday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(3,'Wednesday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(4,'Thursday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(5,'Friday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(6,'Saturday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(7,'Sunday',inplace=True)
    
    dayHour['ARR_DELAY'] = dayHour['ARR_DELAY'].apply(lambda x:30 if x>30 else x)
    df_cali = pd.DataFrame([['Monday',24, 0], ['Sunday',24, 30]], columns=('DAY_OF_WEEK','DEP_HOUR','ARR_DELAY'))
    dayHour = dayHour.append(df_cali)
    
    hm = hv.HeatMap(dayHour, y='DAY_OF_WEEK', x='DEP_HOUR', values='ARR_DELAY', stat=None,
             width=600, plot_height=330, palette=OrRd9, ylabel='Day of the Week', xlabel='Departure Hour',
             legend = False,
             title= desc, toolbar_location='above')
    hm.x_range = Range1d(0, 23)
    
    #Add the Colorbar. Its a bit complicated in Bokeh
    Invert_OrRd9 = OrRd9[::-1]
    color_mapper = LinearColorMapper(palette=Invert_OrRd9, low=0, high=30)

    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0,0))
    hm.add_layout(color_bar, 'right')
    
    return hm

def create_tab(df_sub,k):
    hm = create_heatmap(df_sub[df_sub['UNIQUE_CARRIER']==top10_carrier['IATA'][k-1]],desc = top10_carrier['Description'][k-1])
    tab = Panel(child=hm, title=top10_carrier['IATA'][k-1])
    return tab

#Create 11 heatmaps, 1 for all flights, and 10 each for the 10 biggest airlines
hm = create_heatmap(df_sub)
tab0 = Panel(child=hm, title="Overall")

tab1 = create_tab(df_sub,1)
tab2 = create_tab(df_sub,2)
tab3 = create_tab(df_sub,3)
tab4 = create_tab(df_sub,4)
tab5 = create_tab(df_sub,5)
tab6 = create_tab(df_sub,6)
tab7 = create_tab(df_sub,7)
tab8 = create_tab(df_sub,8)
tab9 = create_tab(df_sub,9)
tab10 = create_tab(df_sub,10)