In [2]:
import pandas as pd
import numpy as np
from plotnine import *

In [3]:
##functions to print a plot of totals with given intervals
##lower and upper determine an (inclusive) integer interval of years (from 2007)
def giveMnPrFt(df, lower, upper):
    if (lower > upper): 
        pass
    if (lower == upper):
        return(ggplot(tot_my[(upper)*12:(upper+1)*12], aes(x='time', y='MnPrFt'))+geom_point(size =0.25) +geom_line(aes(color='MnPrFt')))
    for i in [lower, upper]:
        return(ggplot(tot_my[(i)*12:(upper+1)*12], aes(x='time', y='MnPrFt'))+geom_point(size =0.25) +geom_line(aes(color='MnPrFt')))
    pass

def givearr_delay(df, lower, upper):
    if (lower > upper): 
        pass
    if (lower == upper):
        return(ggplot(tot_my[(upper)*12:(upper+1)*12], aes(x='time', y='arr_delay'))+geom_point(size =0.25) +geom_line(aes(color='arr_delay')))
    for i in [lower, upper]:
        return(ggplot(tot_my[(i)*12:(upper+1)*12], aes(x='time', y='arr_delay'))+geom_point(size =0.25)+geom_line(aes(color='arr_delay')))
    pass


def givearr_flights(df, lower, upper):
    if (lower > upper): 
        pass
    if (lower == upper):
        return(ggplot(tot_my[(upper)*12:(upper+1)*12], aes(x='time', y='arr_flights'))+geom_point(size =0.25) +geom_line(aes(color='arr_flights')))
    for i in [lower, upper]:
        return(ggplot(tot_my[(i*12):(upper+1)*12], aes(x='time', y='arr_flights'))+geom_point(size = 0.25) +geom_line(aes(color='arr_flights')))
    pass


In [None]:
## Data filtering 
#####################

############ airports
## returns airport specific dataframes over given time interval [9,188]
def airports(lowT, hiT, df): 
    df = df.loc[(df['time']<=hiT) & (df['time']>= lowT)].groupby(['airport']).agg({'arr_flights': sum, 'arr_delay':sum}).reset_index()
    df['MnPrFt'] = df['arr_delay']/df['arr_flights']
    return df

## Use to define Top N (by number of flight arrivals)
## We use this as people are most likely to fly to these airports
def airTopN(lowT, HiT, df, N):
    ap = airports(lowT, HiT, df).sort_values(by='arr_flights', ascending = False)
    return ap.head(N)

## Watch evolution of top values corresponding to airports over time
def airovertime(lowT,hiT, df, N):
    ## Create first element.
    overtime = pd.DataFrame()
    for i in range(lowT,hiT+1):
        temp = airTopN(i, i, df, max({N,50})) ## include 50 airports with most arrivals that month
        temp['time']=i
        temp = temp.head(N)
        overtime=pd.concat([overtime, temp], axis=0)
    return overtime

############# carriers
## These give insight into which large carriers are frequently late
## Function below gives carrier specific dataframes over time interval [9,188]
def carriers(lowT, hiT, df): 
    df = df.loc[(df['time']<=hiT) & (df['time']>= lowT)].groupby(['carrier']).agg({'arr_flights': sum, 'arr_delay':sum}).reset_index()
    df['MnPrFt'] = df['arr_delay']/df['arr_flights']
    return df

## Top N carriers corresponding to carrier over tiem interval
def carTopN(lowT, HiT, df, N):
    ap = carriers(lowT, HiT, df).sort_values(by='arr_flights', ascending = False)
    return ap.head(N)

## Watch evolution of top values over time corresponding to carriers
def carovertime(lowT,hiT, df, N):
    ## Create first element.
    overtime = pd.DataFrame()
    for i in range(lowT,hiT+1):
        temp = carTopN(i, i, df, max({50,N}))
        temp['time']=i
        temp = temp.head()
        overtime=pd.concat([overtime, temp], axis=0)
    return overtime.drop_duplicates()

In [5]:
## Plot top N via barplot
## Use max(50,N) to always include top 50 airports 

## Below, N is always the number of rows

############# Airports
def plotairTopNaf(lowT,hiT, df, N):
    df = airTopN(lowT, hiT, df, max(50,N))
    df = df.head(N)
    return ggplot(df, aes(x='airport', y='arr_flights', fill = 'airport'))+geom_bar(stat='identity')
    
def plotairTopNad(lowT,hiT, df, N):
    df = airTopN(lowT, hiT, df, max(50,N)).head(N)
    return ggplot(df, aes(x='airport', y='arr_delay', fill = 'airport'))+geom_bar(stat='identity')
 
def plotairTopNmpf(lowT,hiT, df, N):
    df = airTopN(lowT, hiT, df, max(50,N)).head(N)
    return ggplot(df, aes(x='airport', y='MnPrFt', fill = 'airport'))+geom_bar(stat='identity')

## Line plots with given time interval
def plotairOTaf(lowT,hiT, df, N):
    df=airovertime(lowT,hiT, df,N)
    return ggplot(df, aes(x='time', y='arr_flights', fill = 'airport'))+geom_point(size=2)+geom_line(aes(group ='airport', color = 'airport'),size=0.1)
def plotairOTad(lowT,hiT, df, N):
    df=airovertime(lowT,hiT, df,N)
    return ggplot(df, aes(x='time', y='arr_delay', fill = 'airport'))+geom_point(size=2)+geom_line(aes(group ='airport', color = 'airport'),size=0.1)
def plotairOTmpf(lowT,hiT, df, N):
    df=airovertime(lowT,hiT, df, N)
    return ggplot(df, aes(x='time', y='MnPrFt', fill = 'airport'))+geom_point(size=2)+geom_line(aes(group ='airport', color = 'airport'),size=0.1)

############### Carriers

## Bar plots with given time interval
def plotcarTopNaf(lowT,hiT, df, N):
    df = carTopN(lowT, hiT, df, max(50,N)).head(N)
    return ggplot(df, aes(x='carrier', y='arr_flights', fill = 'carrier'))+geom_bar(stat='identity')
    
def plotcarTopNad(lowT,hiT, df, N):
    df = carTopN(lowT, hiT, df, max(50,N)).head(N)
    return ggplot(df, aes(x='carrier', y='arr_delay', fill = 'carrier'))+geom_bar(stat='identity')
 
def plotcarTopNmpf(lowT,hiT, df, N):
    df = carTopN(lowT, hiT, df, max(50,N)).head(N)
    return ggplot(df, aes(x='carrier', y='MnPrFt', fill = 'carrier'))+geom_bar(stat='identity')

## Line plots with given time interval

def plotcarOTaf(lowT,hiT, df, N):
    df=carovertime(lowT,hiT, df, N)
    return ggplot(df, aes(x='time', y='arr_flights', fill = 'carrier'))+geom_point(size=2)+geom_line(aes(group ='carrier', color = 'carrier'),size=0.1)

    
def plotcarOTad(lowT,hiT, df, N):
    df=carovertime(lowT,hiT, df, N)
    return ggplot(df, aes(x='time', y='arr_delay', fill = 'carrier'))+geom_point(size=2)+geom_line(aes(group ='carrier', color = 'carrier'),size=0.1)

 
def plotcarOTmpf(lowT,hiT, df, N):
    df=carovertime(lowT,hiT, df, N)
    return ggplot(df, aes(x='time', y='MnPrFt', fill = 'carrier'))+geom_point(size=2)+geom_line(aes(group ='carrier', color = 'carrier'),size=0.1)

