In [1]:
import os, sys
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline

Investigate the performance of flights over time or simply look at data for a given year and create a graphic that showcases your finding(s).

In [2]:
def addDate(df):
    """ Function to add a column with date from year, month and day columns """
    import datetime 
    dt = df[['Year', 'Month', 'DayofMonth']]
    dt.columns = ["year", "month", "day"]
    dt.head(2)

    #pd.to_datetime(dt["year", "month", "day"])
    df["Date"] = pd.to_datetime(dt.year*10000 + dt.month*100 + dt.day, format='%Y%m%d')

    return df

# Reading data

General delay causes:
  - From http://www.transtats.bts.gov/ot_delay/OT_DelayCause1.asp?pn=1
  - Download file: http://www.transtats.bts.gov/ot_delay/ot_delaycause1.asp?display=download&pn=0&month=6&year=2016

In [3]:
df_delays = pd.read_csv("./rawData/airline_delay_causes.csv")
print(" Dataframe with {0} registers and the folloging data columns: \n {1}".format(
        len(df_delays), ", ".join(df_delays.columns)))

 Dataframe with 55342 registers and the folloging data columns: 
 year,  month, carrier, carrier_name, airport, airport_name, arr_flights, arr_del15, carrier_ct,  weather_ct, nas_ct, security_ct, late_aircraft_ct, arr_cancelled, arr_diverted,  arr_delay,  carrier_delay, weather_delay, nas_delay, security_delay, late_aircraft_delay, Unnamed: 21


Detailed causes by year:
  - Download yearly files from http://stat-computing.org/dataexpo/2009/the-data.html
  - Example file: http://stat-computing.org/dataexpo/2009/1992.csv.bz2

In [4]:
df_origs = {}

In [5]:
## Read the files year by year to a dictionary
years = range(1987,2009)
print (" Reading ", years[0])

for kyear in years:
    print (" Searching ", kyear)
    if not kyear in df_origs.keys():
        print (" Not found. Reading file for ", kyear)
        try:
            df_origs[kyear] = pd.read_csv("./rawData/{0}.csv.bz2".format(kyear), engine="c")
        except:
            print("Error reading", kyear)
            pass

 Reading  1987
 Searching  1987
 Not found. Reading file for  1987
 Searching  1988
 Not found. Reading file for  1988
 Searching  1989
 Not found. Reading file for  1989
 Searching  1990
 Not found. Reading file for  1990
 Searching  1991
 Not found. Reading file for  1991
 Searching  1992
 Not found. Reading file for  1992
 Searching  1993
 Not found. Reading file for  1993
 Searching  1994
 Not found. Reading file for  1994
 Searching  1995
 Not found. Reading file for  1995
 Searching  1996
 Not found. Reading file for  1996
 Searching  1997
 Not found. Reading file for  1997
 Searching  1998
 Not found. Reading file for  1998
 Searching  1999
 Not found. Reading file for  1999
 Searching  2000
 Not found. Reading file for  2000
 Searching  2001
 Not found. Reading file for  2001
 Searching  2002
 Not found. Reading file for  2002
Error reading 2002
 Searching  2003
 Not found. Reading file for  2003


  interactivity=interactivity, compiler=compiler, result=result)


 Searching  2004
 Not found. Reading file for  2004
 Searching  2005
 Not found. Reading file for  2005
 Searching  2006
 Not found. Reading file for  2006
 Searching  2007
 Not found. Reading file for  2007
 Searching  2008
 Not found. Reading file for  2008


In [6]:
# Read years printed
print(" Read the years : ", sorted(df_origs.keys()))

 Read the years :  [1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007, 2008]


In [7]:
# Add data to dataframes and print stats
yk = list(df_origs.keys())
print (yk)
df_orig = df_origs[yk[0]]
df_orig = addDate(df_orig)
print(" Dataframe with {0} registers and the folloging data columns: \n {1}".format(
        len(df_orig), ", ".join(df_orig.columns)))

[1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007, 2008]
 Dataframe with 1311826 registers and the folloging data columns: 
 Year, Month, DayofMonth, DayOfWeek, DepTime, CRSDepTime, ArrTime, CRSArrTime, UniqueCarrier, FlightNum, TailNum, ActualElapsedTime, CRSElapsedTime, AirTime, ArrDelay, DepDelay, Origin, Dest, Distance, TaxiIn, TaxiOut, Cancelled, CancellationCode, Diverted, CarrierDelay, WeatherDelay, NASDelay, SecurityDelay, LateAircraftDelay, Date


# Carriers names

Generate a dictionary to transform short carriers to extended carriers name

In [35]:
## Get carriers CODE
Carriers = set(df_orig["UniqueCarrier"].values.tolist())
print(" Number of carriers {0} (Code):\n\t{1}".format(len(Carriers), ", ".join(Carriers)))

## Asociate carrier CODE to Carrier Name
def genNamesDict(df_delays):
    cD = df_delays[["carrier","carrier_name"]].drop_duplicates()
    return cD.set_index("carrier").to_dict()["carrier_name"]
cd = genNamesDict(df_delays)

## Print carriers names
cdl = [cd[i] for i in df_delays["carrier"].drop_duplicates()]
print(" Number of carriers {0} (Names):\n\t{1}".format(len(Carriers), ",\n\t".join(cdl)))

 Number of carriers 14 (Code):
	NW, TW, PI, CO, EA, PA (1), AA, PS, WN, AS, UA, HP, US, DL
 Number of carriers 14 (Names):
	American Airlines Inc.,
	Alaska Airlines Inc.,
	JetBlue Airways,
	Continental Air Lines Inc.,
	Independence Air,
	Delta Air Lines Inc.,
	ExpressJet Airlines Inc.,
	AirTran Airways Corporation,
	America West Airlines Inc.,
	Envoy Air,
	Northwest Airlines Inc.,
	SkyWest Airlines Inc.,
	ExpressJet Airlines Inc.,
	ATA Airlines d/b/a ATA,
	United Air Lines Inc.,
	US Airways Inc.,
	Southwest Airlines Co.,
	Hawaiian Airlines Inc.,
	Comair Inc.,
	Frontier Airlines Inc.,
	Mesa Airlines Inc.,
	Aloha Airlines Inc.,
	ExpressJet Airlines Inc.,
	Endeavor Air Inc.,
	Virgin America,
	Spirit Air Lines


## 01.-  Flights per year

### Number of flights per year

In [10]:
## Calculated as the number of registers
dd = {}
for kyear in df_origs.keys():
    value = len(df_origs[kyear])
    #print(kyear, value)
    dd[kyear] = value

# To dataframe and file
df = pd.DataFrame(dd, index=[0]).T.reset_index() #, orient='index')
df.columns = ["Year", "Value"]
df.set_index("Year", inplace=True)
df.to_csv("data/01-Year_NFlights.csv")
#df.to_csv
print( df )

        Value
Year         
1987  1311826
1988  5202096
1989  5041200
1990  5270893
1991  5076925
1992  5092157
1993  5070501
1994  5180048
1995  5327435
1996  5351983
1997  5411843
1998  5384721
1999  5527884
2000  5683047
2001  5967780
2003  6488540
2004  7129270
2005  7140596
2006  7141922
2007  7453215
2008  7009728


## 02.-  Flights ON TIME (before scheduled time) per year


In [11]:
## Calculated as the ratio of fights arrived on time
dd = {}
for kyear in df_origs.keys():
    df = df_origs[kyear]
    value = 1-len(df[df.ArrDelay > 0 ])/(float(len(df)))
    print(kyear, value)
    dd[kyear] = value

# To dataframe and file
df = pd.DataFrame(dd, index=[0]).T.reset_index() 
df.columns = ["Year", "Value"]
df.set_index("Year", inplace=True)
df.to_csv("data/02-Year_FlightsOnTime.csv")
print( df )

1987 0.3839175317458261
1988 0.4638334240659918
1989 0.43808339284297393
1990 0.4799609857380903
1991 0.5172469555882744
1992 0.5069661049335281
1993 0.5128487303325648
1994 0.5115977689782025
1995 0.5007222049635518
1996 0.4720549373942331
1997 0.49861664501353786
1998 0.5334787076247776
1999 0.5314400591618782
2000 0.5111662810460655
2001 0.5911942129233987
2003 0.6237457116701137
2004 0.5766127527783349
2005 0.5710131199132398
2006 0.5523101484446344
2007 0.5366489494801907
2008 0.5749472732750829
         Value
Year          
1987  0.383918
1988  0.463833
1989  0.438083
1990  0.479961
1991  0.517247
1992  0.506966
1993  0.512849
1994  0.511598
1995  0.500722
1996  0.472055
1997  0.498617
1998  0.533479
1999  0.531440
2000  0.511166
2001  0.591194
2003  0.623746
2004  0.576613
2005  0.571013
2006  0.552310
2007  0.536649
2008  0.574947


## 03.-  Delay causes

In [12]:
df = df_origs[2004].head(20)
print (  ", ".join(df.columns) ) 

Year, Month, DayofMonth, DayOfWeek, DepTime, CRSDepTime, ArrTime, CRSArrTime, UniqueCarrier, FlightNum, TailNum, ActualElapsedTime, CRSElapsedTime, AirTime, ArrDelay, DepDelay, Origin, Dest, Distance, TaxiIn, TaxiOut, Cancelled, CancellationCode, Diverted, CarrierDelay, WeatherDelay, NASDelay, SecurityDelay, LateAircraftDelay


In [13]:
# Delay flights (Arrival Delay > 0)
df = df[df.ArrDelay > 0].copy()
df[["ArrTime","CRSArrTime","Distance","Cancelled"]+[i for i in df.columns if "Delay" in i]]

Unnamed: 0,ArrTime,CRSArrTime,Distance,Cancelled,ArrDelay,DepDelay,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
2,920.0,915,599,0,5.0,3.0,0,0,0,0,0
4,918.0,915,599,0,3.0,5.0,0,0,0,0,0
6,944.0,915,599,0,29.0,20.0,20,0,9,0,0
10,923.0,915,599,0,8.0,1.0,0,0,0,0,0
13,939.0,915,599,0,24.0,-5.0,0,0,24,0,0
18,1753.0,1725,1739,0,28.0,5.0,0,0,28,0,0
19,1832.0,1725,1739,0,67.0,65.0,9,0,2,0,56


In [14]:
delaycols = ["CarrierDelay", "WeatherDelay", "NASDelay", "SecurityDelay", "LateAircraftDelay"]
df[delaycols+["ArrDelay","Cancelled","Diverted"]].sum(axis=0)

CarrierDelay          29.0
WeatherDelay           0.0
NASDelay              63.0
SecurityDelay          0.0
LateAircraftDelay     56.0
ArrDelay             164.0
Cancelled              0.0
Diverted               0.0
dtype: float64

In [15]:
# Sum delay time by year
dd = {}
df3 = pd.DataFrame()
for kyear in range(2003,2009): #df_origs.keys():
    df = df_origs[kyear]
    ds = df[delaycols+["ArrDelay", "DepDelay"]].sum(axis=0)
    ds["Year"] = kyear
    df3 = pd.concat([df3, ds], axis=1)

# To dataframe
df = df3.T
df["Year"] = df["Year"].astype(int)
df.set_index("Year",inplace=True)

print( df ) 

        ArrDelay  CarrierDelay    DepDelay  LateAircraftDelay    NASDelay  \
Year                                                                        
2003  22931403.0     8568280.0  33520821.0         10042720.0  11862557.0   
2004  45476603.0    18833073.0  55258141.0         24566219.0  24472354.0   
2005  50217971.0    21441880.0  60779748.0         26157607.0  24046197.0   
2006  60812894.0    24255294.0  70857247.0         32245816.0  25610715.0   
2007  74151049.0    28808434.0  83127865.0         38004942.0  28200746.0   
2008  55994978.0    24048217.0  68546281.0         31670242.0  26171501.0   

      SecurityDelay  WeatherDelay  
Year                               
2003        80663.0     1987155.0  
2004       179219.0     5037460.0  
2005       141045.0     4713790.0  
2006       221371.0     4853843.0  
2007       176906.0     5739649.0  
2008       114316.0     4633717.0  


In [17]:
# Calculate each delay cause share
sums = df[delaycols].sum(axis=1)
sums.index

for kcol in sums.index: 
    #print(kcol, sums[kcol], df.loc[kcol])
    df.loc[kcol] = df.loc[kcol].div(sums[kcol])

# To dataframe and file
df1 = df[delaycols].copy()
df1.columns = [i.replace("Delay","") for i in df1.columns]
df1.to_csv("data/03-Year_FlightsDelayCauses.csv")
df1.head(2)

Unnamed: 0_level_0,Carrier,Weather,NAS,Security,LateAircraft
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2003,0.263304,0.061065,0.364538,0.002479,0.308614
2004,0.257676,0.068923,0.334833,0.002452,0.336117


## 04.-  Greater cause by day

delaycols = [i for i in df.columns if "Delay" in i]

for i in delaycols:
    df[i] = df[i]/df["ArrDelay"]
print(df["ArrDelay"].count())
df["OtherDelay"] = 1 - df[delaycols].sum(axis=1)
df[delaycols+["OtherDelay"]].sum()

In [36]:
# Get maximum cause by day
dfDateFinal = pd.DataFrame()
for kyear in range(2003,2009):
    print(kyear)
    df = df_origs[kyear]
    df = addDate(df)

    def top(x):
        x.set_index('Date', inplace=True)
        df = pd.DataFrame({'MayorCause':[],'MaxValue':[]})
        df.index.name='Date'
        df.loc[x.index.values[0],['MayorCause']] = x.sum().nlargest(1).index.tolist()[0]
        df.loc[x.index.values[0],['MaxValue']] = x.sum().nlargest(1).values
        return df

    dfDate = df[["Date","LateAircraftDelay", "CarrierDelay", "NASDelay", "WeatherDelay", "SecurityDelay"]]
    dfDate.columns = [ i.replace("Delay","") for i in dfDate.columns] 
    dfDate = dfDate.groupby('Date').apply(top).reset_index(level=1, drop=True).reset_index()
    dfDate.set_index("Date", inplace=True)
    #dfDate.to_csv("data/04-Date_FlightsDelayCauses.csv")
    #dfDate.head()
    dfDateFinal = pd.concat([dfDateFinal,dfDate], axis=0)
    print (len(dfDateFinal))
    
dfDateFinal.tail(2)

2003
365
2004
731
2005
1096
2006
1461
2007
1826
2008
2192


Unnamed: 0_level_0,MaxValue,MayorCause
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-12-30,80162.0,LateAircraft
2008-12-31,83589.0,LateAircraft


In [26]:
# maximum cause by day as a value for plotting
dd = {"Carrier": 1, "NAS":2, "Weather":3, "LateAircraft": 4, "Security": 5}
def ddd(i):
    return dd[i]
dfDateFinal.head()

# To file
dfDateFinal["value"] = dfDateFinal["MayorCause"].apply(ddd)
dfDateFinal.to_csv("data/04-Date_FlightsDelayCauses.csv")
dfDateFinal.tail()

Unnamed: 0_level_0,MaxValue,MayorCause,value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2008-12-27,262837.0,LateAircraft,4
2008-12-28,106602.0,LateAircraft,4
2008-12-29,59673.0,Carrier,1
2008-12-30,80162.0,LateAircraft,4
2008-12-31,83589.0,LateAircraft,4


## 05.- Airlines "Late Aircraft" 

In [37]:
# Dataframe structure
df_delays.head(3)

Unnamed: 0,year,month,carrier,carrier_name,airport,airport_name,arr_flights,arr_del15,carrier_ct,weather_ct,...,late_aircraft_ct,arr_cancelled,arr_diverted,arr_delay,carrier_delay,weather_delay,nas_delay,security_delay,late_aircraft_delay,Unnamed:21
0,2003,6,AA,American Airlines Inc.,ATL,"Atlanta, GA: Hartsfield-Jackson Atlanta Intern...",752.0,186.0,33.99,27.82,...,17.53,5.0,0.0,8314.0,1367.0,1722.0,3817.0,139.0,1269.0,
1,2003,6,AA,American Airlines Inc.,BOS,"Boston, MA: Logan International",1266.0,225.0,69.43,23.66,...,45.73,7.0,0.0,12139.0,4201.0,1783.0,3067.0,45.0,3043.0,
2,2003,6,AA,American Airlines Inc.,BWI,"Baltimore, MD: Baltimore/Washington Internatio...",593.0,101.0,17.56,20.49,...,24.69,1.0,1.0,5698.0,1058.0,1332.0,1708.0,0.0,1600.0,


In [39]:
# Dataframe interesting columns averaged per carrier and year
print(df_delays.columns)
def idfun(i):
    return i
df_delays.columns = [ i.replace(" ", "") for i in df_delays.columns]
df = df_delays.groupby(["carrier", "year"]).mean().reset_index() #level=1, drop=True).reset_index()
print(df.reset_index().head(4))
df = df[["carrier","year"] + [ i for i in df.columns if "_delay" in i ]]
df.columns =  [ i.replace("carrier","name") for i in df.columns ]
df.columns =  [ i.replace("name_delay", "carrier").replace("_delay","") for i in df.columns ]
df.head()

Index(['year', 'month', 'carrier', 'carrier_name', 'airport', 'airport_name',
       'arr_flights', 'arr_del15', 'carrier_ct', 'weather_ct', 'nas_ct',
       'security_ct', 'late_aircraft_ct', 'arr_cancelled', 'arr_diverted',
       'arr_delay', 'carrier_delay', 'weather_delay', 'nas_delay',
       'security_delay', 'late_aircraft_delay', 'Unnamed:21'],
      dtype='object')
   index carrier  year     month  arr_flights   arr_del15  carrier_ct  \
0      0      9E  2007  6.463087   637.000000  132.859060   29.974832   
1      1      9E  2008  6.761364   537.312500   92.034091   19.628239   
2      2      9E  2009  6.160714   582.976190   95.339286   18.588750   
3      3      9E  2010  6.755319   540.702128  103.994681   20.430957   

   weather_ct     nas_ct  security_ct  late_aircraft_ct  arr_cancelled  \
0    5.633221  50.800805     0.197987         46.252148      22.973154   
1    3.117386  34.643750     0.086875         34.558239      16.721591   
2    3.269226  43.564464     0.102

Unnamed: 0,name,year,arr,carrier,weather,nas,security,late_aircraft
0,9E,2007,8671.38255,2677.469799,732.214765,2283.892617,6.516779,2971.288591
1,9E,2008,5612.801136,1576.835227,307.477273,1522.295455,3.289773,2202.903409
2,9E,2009,5245.529762,1330.559524,285.946429,1832.803571,2.625,1793.595238
3,9E,2010,5872.12766,1488.797872,265.468085,1662.505319,6.93617,2448.420213
4,9E,2013,7041.669456,1938.004184,323.401674,2225.702929,7.07113,2547.48954


In [29]:
df.sort_values(["year"]).tail(3)

Unnamed: 0,name,year,arr,carrier,weather,nas,security,late_aircraft
35,AS,2016,2102.073333,580.033333,58.353333,793.433333,11.086667,659.166667
75,DL,2016,16288.804598,6342.37931,933.931034,4043.655172,9.678161,4959.16092
49,B6,2016,10610.611111,2976.972222,324.868056,2724.006944,30.381944,4554.381944


In [76]:
# Take years 
yearEnd = 2010
yearFir = 2003
print(len(df), len(df[(df.year<=yearEnd) &
                      (df.year>=yearFir)]))
dfU = df[(df.year<=yearEnd) & (df.year>=yearFir)]
dfU.head(20) 

242 155


Unnamed: 0,name,year,arr,carrier,weather,nas,security,late_aircraft
0,9E,2007,8671.38255,2677.469799,732.214765,2283.892617,6.516779,2971.288591
1,9E,2008,5612.801136,1576.835227,307.477273,1522.295455,3.289773,2202.903409
2,9E,2009,5245.529762,1330.559524,285.946429,1832.803571,2.625,1793.595238
3,9E,2010,5872.12766,1488.797872,265.468085,1662.505319,6.93617,2448.420213
5,AA,2003,16631.384236,4223.187192,1165.320197,7112.950739,31.014778,4098.91133
6,AA,2004,19866.91092,4542.178161,1537.557471,7832.543103,44.862069,5909.770115
7,AA,2005,18535.836207,4472.741379,1215.916667,6913.83908,21.272989,5912.066092
8,AA,2006,19883.797101,4704.527536,1024.037681,7238.118841,46.530435,6870.582609
9,AA,2007,25509.270833,5991.660714,1674.669643,8550.964286,19.550595,9272.425595
10,AA,2008,22654.321429,6113.741071,890.732143,7797.169643,16.651786,7836.026786


In [77]:
names = set(dfU["name"])
namfli = []
for kname in names: 
    print(len(dfU[ dfU.name == kname] ), (yearEnd - yearFir + 1))
    if len(dfU[ dfU.name == kname] ) == (yearEnd - yearFir + 1):
        namfli.append(kname)
        print(dfU[ dfU.name == kname])

", ".join(namfli)

7 8
3 8
6 8
4 8
8 8
   name  year          arr      carrier     weather          nas   security  \
22   AS  2003  4317.933333  1459.352381   51.619048   898.542857  30.295238   
23   AS  2004  5518.100000  1945.827778   72.450000   989.150000  39.472222   
24   AS  2005  7437.038462  2938.313187   81.939560  1187.692308  25.153846   
25   AS  2006  6405.586022  2025.978495   80.252688  1517.736559  29.908602   
26   AS  2007  7140.144444  2350.366667   71.733333  1659.433333  34.327778   
27   AS  2008  5101.448087  1554.961749  117.617486  1565.289617  15.196721   
28   AS  2009  2914.608040   877.934673   76.170854  1036.341709   7.261307   
29   AS  2010  1976.212963   623.787037   45.291667   689.004630   4.384259   

    late_aircraft  
22    1878.123810  
23    2471.200000  
24    3203.939560  
25    2751.709677  
26    3024.283333  
27    1848.382514  
28     916.899497  
29     613.745370  
4 8
5 8
5 8
4 8
7 8
8 8
   name  year           arr      carrier      weather           

'AS, EV, MQ, WN, B6, HA, OO, AA, CO, FL, UA, US, DL'

### Late Aircraft 

In [78]:
## Late Aircraft cause

dff = pd.DataFrame()
for kname in namfli:
    dftmp = dfU[dfU.name == kname][["year", "late_aircraft"]].set_index("year")
    dftmp.columns = [kname]
    dff = pd.concat([dff, dftmp], axis=1)
    
## Late Aircraft cause relative to 2003
dff = dff/dff.loc[2003]

## Carrier name extended and to file
cd = genNamesDict(df_delays)
dff.columns = [str(cd[i]) for i in dff.columns]
dff.to_csv("data/05-DelayAircraftCarrier.csv")
dff

Unnamed: 0_level_0,Alaska Airlines Inc.,ExpressJet Airlines Inc.,Envoy Air,Southwest Airlines Co.,JetBlue Airways,Hawaiian Airlines Inc.,SkyWest Airlines Inc.,American Airlines Inc.,Continental Air Lines Inc.,AirTran Airways Corporation,United Air Lines Inc.,US Airways Inc.,Delta Air Lines Inc.
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2004,1.315781,0.372753,1.144472,1.496053,1.58261,0.814863,1.722558,1.44179,1.107386,1.027819,1.181757,1.101932,1.537024
2005,1.705926,0.402873,1.192199,1.606966,3.544991,1.297914,2.105736,1.44235,1.311687,1.69286,1.361266,1.347652,1.774582
2006,1.465138,0.684616,1.659607,1.758336,4.496346,2.962625,4.173463,1.676197,2.047832,1.649684,1.911773,1.307103,1.537323
2007,1.610268,1.238614,2.17514,1.768046,5.599559,3.44937,4.292704,2.262168,2.149492,1.408343,2.311085,1.864045,1.524659
2008,0.984164,1.335203,1.730456,1.914583,4.675826,2.550847,3.879439,1.911734,1.946013,1.458097,2.041526,1.02668,1.497967
2009,0.4882,6.641402,1.310385,1.305156,2.8361,1.248153,4.449387,1.263515,1.163095,1.380941,1.160617,0.800722,1.196698
2010,0.326786,3.03835,1.166317,1.426533,3.146604,1.058887,6.679391,0.99233,0.825936,0.903529,0.757662,0.687148,2.367448


### Total

In [79]:
# Total time delayed

dff = pd.DataFrame()
for kname in namfli:
    dftmp = dfU[dfU.name == kname][["year", "arr"]].set_index("year")
    dftmp.columns = [kname]
    #print(dftmp)
    #dff = pd.concat([dff, dfU[dfU.name == kname][["name", "year","late_aircraft"]]], axis=0)
    dff = pd.concat([dff, dftmp], axis=1)

## Late Aircraft cause relative to 2003
dff = dff/dff.loc[2003]

## Carrier name extended and to file
cd = genNamesDict(df_delays)
dff.columns = [str(cd[i]) for i in dff.columns]
dff.to_csv("data/06-DelayTotalCarrier.csv")
dff.head()

Unnamed: 0_level_0,Alaska Airlines Inc.,ExpressJet Airlines Inc.,Envoy Air,Southwest Airlines Co.,JetBlue Airways,Hawaiian Airlines Inc.,SkyWest Airlines Inc.,American Airlines Inc.,Continental Air Lines Inc.,AirTran Airways Corporation,United Air Lines Inc.,US Airways Inc.,Delta Air Lines Inc.
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2004,1.277949,1.106513,1.094236,1.455292,1.340056,0.747727,1.405029,1.194543,1.091024,1.046357,1.134945,1.115708,1.37924
2005,1.722361,1.064566,1.17558,1.600971,2.791234,0.75512,1.602374,1.11451,1.227482,1.560324,1.089476,1.350506,1.423765
2006,1.483484,1.297763,1.52662,1.767338,3.79631,1.138705,2.239641,1.195559,1.581846,1.389585,1.354697,1.453231,1.148358
2007,1.653602,1.560399,1.967393,1.754456,4.881482,1.39676,2.029741,1.533803,1.6181,1.190326,1.540971,1.938567,1.124227
