In [1]:
import pandas as pd
import numpy as np

In [2]:
flight_delay_by_cause_file = './assets/!Flight Delays by Cause National June 2004 to November 2014.csv'


### Delay cause definitions:

* Air Carrier: The cause of the cancellation or delay was due to circumstances within the airline's control (e.g. maintenance or crew problems, aircraft cleaning, baggage loading, fueling, etc.).

* Extreme Weather: Significant meteorological conditions (actual or forecasted) that, in the judgment of the carrier, delays or prevents the operation of a flight such as tornado, blizzard or hurricane.

* National Aviation System (NAS): Delays and cancellations attributable to the national aviation system that refer to a broad set of conditions, such as non-extreme weather conditions, airport operations, heavy traffic volume, and air traffic control.

* Late-arriving aircraft: A previous flight with same aircraft arrived late, causing the present flight to depart late.

* Security: Delays or cancellations caused by evacuation of a terminal or concourse, re-boarding of aircraft because of security breach, inoperative screening equipment and/or long lines in excess of 29 minutes at screening areas.

A flight is considered delayed when it arrived 15 or more minutes than the schedule (see definitions in Frequently Asked Questions). Delayed minutes are calculated for delayed flights only. Data presented summarizes arriving flights only. 
When multiple causes are assigned to one delayed flight, each cause is prorated based on delayed minutes it is responsible for. The displayed numbers are rounded and may not add up to the total.

https://www.rita.dot.gov/bts/help/aviation/html/understanding.html
https://www.rita.dot.gov/bts/help_with_data/aviation/index.html#q7
https://www.transtats.bts.gov/OT_Delay/OT_DelayCause1.asp?pn=1

### Terminology:

arr_flights: count of operations (flights) - **count**

arr_del15: aggergate **count** of delayed flights from sum of:
* carrier_ct
* weather_ct
* nas_ct
* security_ct
* late_aircraft_ct

arr_delay: total **minutes** of delayed flights from sum of:
* arr_delay
* carrier_delay
* weather_delay
* nas_delay
* security_delay
* late_aircraft_delay

In [3]:
Flight_delay_by_cause_df = pd.read_csv(flight_delay_by_cause_file)
Flight_delay_by_cause_df.columns

Index([u'year', u' month', u'carrier', u'carrier_name', u'airport',
       u'airport_name', u'arr_flights', u'arr_del15', u'carrier_ct',
       u' weather_ct', u'nas_ct', u'security_ct', u'late_aircraft_ct',
       u'arr_cancelled', u'arr_diverted', u' arr_delay', u' carrier_delay',
       u'weather_delay', u'nas_delay', u'security_delay',
       u'late_aircraft_delay'],
      dtype='object')

In [4]:
Flight_delay_by_cause_df.columns = [u'year', u' month', u'carrier', u'carrier_name', u'airport',
       u'airport_name', u'arr_flights_ct', u'arr_del15_ct', u'carrier_ct',
       u' weather_ct', u'nas_ct', u'security_ct', u'late_aircraft_ct',
       u'arr_cancelled', u'arr_diverted', u' arr_delay_mins', u' carrier_delay_mins',
       u'weather_delay_mins', u'nas_delay_mins', u'security_delay_mins',
       u'late_aircraft_delay_mins']

In [5]:
Flight_delay_by_cause_df.columns

Index([u'year', u' month', u'carrier', u'carrier_name', u'airport',
       u'airport_name', u'arr_flights_ct', u'arr_del15_ct', u'carrier_ct',
       u' weather_ct', u'nas_ct', u'security_ct', u'late_aircraft_ct',
       u'arr_cancelled', u'arr_diverted', u' arr_delay_mins',
       u' carrier_delay_mins', u'weather_delay_mins', u'nas_delay_mins',
       u'security_delay_mins', u'late_aircraft_delay_mins'],
      dtype='object')

In [6]:
flight_delay_by_cause_df = Flight_delay_by_cause_df.groupby(['airport','year'])[u'arr_flights_ct', u'arr_del15_ct',u'carrier_ct',
       u' weather_ct', u'nas_ct', u'security_ct', u'late_aircraft_ct', u' arr_delay_mins', u' carrier_delay_mins', 
       u'weather_delay_mins', u'nas_delay_mins', u'security_delay_mins', u'late_aircraft_delay_mins'].sum()

In [14]:
flight_delay_by_cause_df.ix['ABE'].ix[2004]

arr_flights_ct               6886.00
arr_del15_ct                 1335.00
carrier_ct                    574.94
 weather_ct                   230.98
nas_ct                        302.19
security_ct                     2.66
late_aircraft_ct              224.32
 arr_delay_mins             74635.00
 carrier_delay_mins         28876.00
weather_delay_mins          16136.00
nas_delay_mins              12986.00
security_delay_mins            99.00
late_aircraft_delay_mins    16538.00
Name: 2004, dtype: float64

In [15]:
flight_delay_by_cause_df

Unnamed: 0_level_0,Unnamed: 1_level_0,arr_flights_ct,arr_del15_ct,carrier_ct,weather_ct,nas_ct,security_ct,late_aircraft_ct,arr_delay_mins,carrier_delay_mins,weather_delay_mins,nas_delay_mins,security_delay_mins,late_aircraft_delay_mins
airport,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ABE,2004,6886.0,1335.0,574.94,230.98,302.19,2.66,224.32,74635.0,28876.0,16136.0,12986.0,99.0,16538.0
ABE,2005,4640.0,878.0,396.52,139.38,185.56,2.25,154.21,45166.0,21452.0,8781.0,6671.0,89.0,8173.0
ABE,2006,5129.0,1284.0,455.19,129.60,270.15,1.85,427.16,77196.0,24045.0,8644.0,11743.0,242.0,32522.0
ABE,2007,5700.0,1450.0,604.49,84.13,330.67,1.46,429.21,81446.0,32627.0,5731.0,13222.0,71.0,29795.0
ABE,2008,4795.0,1078.0,419.59,54.47,280.46,2.43,321.02,66580.0,25340.0,4619.0,12814.0,179.0,23628.0
ABE,2009,4080.0,745.0,303.66,21.90,210.09,1.62,207.80,41287.0,17717.0,1886.0,9464.0,51.0,12169.0
ABE,2010,4104.0,852.0,320.74,32.76,215.56,1.25,281.63,52207.0,20176.0,2610.0,8666.0,33.0,20722.0
ABE,2011,3853.0,825.0,285.85,22.77,184.26,1.08,331.06,52439.0,18431.0,2115.0,9005.0,31.0,22857.0
ABE,2012,2842.0,534.0,230.55,6.61,103.32,0.19,193.32,32869.0,15080.0,509.0,4147.0,8.0,13125.0
ABE,2013,3098.0,692.0,267.53,11.36,160.35,0.73,252.03,42352.0,16947.0,606.0,7380.0,51.0,17368.0


In [7]:
flight_delay_by_cause_df[flight_delay_by_cause_df['airport'] == 'ATL']

KeyError: 'airport'