# Airlines Exploratory Data Analysis

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


In [2]:
# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
# load the data
df = pd.read_csv('airlines.csv')
df.head()

Unnamed: 0,Airport.Code,Airport.Name,Time.Label,Time.Month,Time.Month Name,Time.Year,Statistics.# of Delays.Carrier,Statistics.# of Delays.Late Aircraft,Statistics.# of Delays.National Aviation System,Statistics.# of Delays.Security,...,Statistics.Flights.Delayed,Statistics.Flights.Diverted,Statistics.Flights.On Time,Statistics.Flights.Total,Statistics.Minutes Delayed.Carrier,Statistics.Minutes Delayed.Late Aircraft,Statistics.Minutes Delayed.National Aviation System,Statistics.Minutes Delayed.Security,Statistics.Minutes Delayed.Total,Statistics.Minutes Delayed.Weather
0,ATL,"Atlanta, GA: Hartsfield-Jackson Atlanta Intern...",2003/06,6,June,2003,1009,1275,3217,17,...,5843,27,23974,30060,61606,68335,118831,518,268764,19474
1,BOS,"Boston, MA: Logan International",2003/06,6,June,2003,374,495,685,3,...,1623,3,7875,9639,20319,28189,24400,99,77167,4160
2,BWI,"Baltimore, MD: Baltimore/Washington Internatio...",2003/06,6,June,2003,296,477,389,8,...,1245,15,6998,8287,13635,26810,17556,278,64480,6201
3,CLT,"Charlotte, NC: Charlotte Douglas International",2003/06,6,June,2003,300,472,735,2,...,1562,14,7021,8670,14763,23379,23804,127,65865,3792
4,DCA,"Washington, DC: Ronald Reagan Washington National",2003/06,6,June,2003,283,268,487,4,...,1100,18,5321,6513,13775,13712,20999,120,52747,4141


In [4]:
# list the data types
print(df.dtypes)

Airport.Code                                           object
Airport.Name                                           object
Time.Label                                             object
Time.Month                                              int64
Time.Month Name                                        object
Time.Year                                               int64
Statistics.# of Delays.Carrier                          int64
Statistics.# of Delays.Late Aircraft                    int64
Statistics.# of Delays.National Aviation System         int64
Statistics.# of Delays.Security                         int64
Statistics.# of Delays.Weather                          int64
Statistics.Carriers.Names                              object
Statistics.Carriers.Total                               int64
Statistics.Flights.Cancelled                            int64
Statistics.Flights.Delayed                              int64
Statistics.Flights.Diverted                             int64
Statisti

In [6]:
# separate numerical and categorical variables

df_cate = df[['Airport.Code', 'Airport.Name', 'Time.Label', 'Time.Month Name', 'Statistics.Carriers.Names']]
df_num = df[['Time.Month', 'Time.Year', 'Statistics.# of Delays.Carrier', 'Statistics.# of Delays.Late Aircraft', 'Statistics.# of Delays.National Aviation System', 'Statistics.# of Delays.Security', 'Statistics.# of Delays.Weather', 'Statistics.Carriers.Total', 'Statistics.Flights.Cancelled', 'Statistics.Flights.Delayed', 'Statistics.Flights.Diverted', 'Statistics.Flights.On Time', 'Statistics.Flights.Total', 'Statistics.Minutes Delayed.Carrier', 'Statistics.Minutes Delayed.Late Aircraft', 'Statistics.Minutes Delayed.National Aviation System', 'Statistics.Minutes Delayed.Security', 'Statistics.Minutes Delayed.Total', 'Statistics.Minutes Delayed.Weather']]

In [7]:
# show correlation between all variables
df_num.corr()

Unnamed: 0,Time.Month,Time.Year,Statistics.# of Delays.Carrier,Statistics.# of Delays.Late Aircraft,Statistics.# of Delays.National Aviation System,Statistics.# of Delays.Security,Statistics.# of Delays.Weather,Statistics.Carriers.Total,Statistics.Flights.Cancelled,Statistics.Flights.Delayed,Statistics.Flights.Diverted,Statistics.Flights.On Time,Statistics.Flights.Total,Statistics.Minutes Delayed.Carrier,Statistics.Minutes Delayed.Late Aircraft,Statistics.Minutes Delayed.National Aviation System,Statistics.Minutes Delayed.Security,Statistics.Minutes Delayed.Total,Statistics.Minutes Delayed.Weather
Time.Month,1.0,-0.075977,-0.008749,-0.021178,-0.010177,0.044398,-0.063141,0.009466,-0.132985,-0.01675,-0.009689,0.024357,0.009119,-0.01942,-0.027362,-0.008659,0.043901,-0.021451,-0.065504
Time.Year,-0.075977,1.0,-0.067204,0.000373,-0.195279,-0.315334,-0.191346,-0.483083,-0.046951,-0.127542,-0.005181,-0.052625,-0.07474,0.028561,0.026476,-0.150661,-0.249772,-0.069273,-0.134033
Statistics.# of Delays.Carrier,-0.008749,-0.067204,1.0,0.872725,0.648211,0.407397,0.729619,0.333489,0.518305,0.861985,0.440189,0.758744,0.829259,0.952155,0.836543,0.532933,0.380787,0.7983,0.708586
Statistics.# of Delays.Late Aircraft,-0.021178,0.000373,0.872725,1.0,0.70333,0.312898,0.684675,0.268856,0.608284,0.906646,0.541561,0.741917,0.831632,0.894121,0.979729,0.626905,0.297629,0.881326,0.697966
Statistics.# of Delays.National Aviation System,-0.010177,-0.195279,0.648211,0.70333,1.0,0.216732,0.731453,0.263649,0.655539,0.927546,0.485182,0.590919,0.721083,0.691461,0.730317,0.956954,0.243707,0.920091,0.698585
Statistics.# of Delays.Security,0.044398,-0.315334,0.407397,0.312898,0.216732,1.0,0.278034,0.235272,0.11753,0.313698,0.132249,0.258856,0.285214,0.297827,0.263751,0.148077,0.868281,0.244291,0.249887
Statistics.# of Delays.Weather,-0.063141,-0.191346,0.729619,0.684675,0.731453,0.278034,1.0,0.234537,0.586373,0.80441,0.56334,0.556228,0.660898,0.755613,0.697096,0.659647,0.28051,0.787211,0.962323
Statistics.Carriers.Total,0.009466,-0.483083,0.333489,0.268856,0.263649,0.235272,0.234537,1.0,0.16097,0.305701,0.074464,0.280551,0.301617,0.250009,0.223946,0.198423,0.203467,0.241707,0.20055
Statistics.Flights.Cancelled,-0.132985,-0.046951,0.518305,0.608284,0.655539,0.11753,0.586373,0.16097,1.0,0.678981,0.428933,0.413247,0.53511,0.591412,0.656723,0.675313,0.138741,0.726973,0.626871
Statistics.Flights.Delayed,-0.01675,-0.127542,0.861985,0.906646,0.927546,0.313698,0.80441,0.305701,0.678981,1.0,0.549239,0.733519,0.851394,0.883851,0.907936,0.85366,0.317777,0.974404,0.785264
