# LAX DATASET

In [1]:
#Dependencies

#import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import calendar

import matplotlib.style as style
style.available
style.use('fivethirtyeight')

#ignore warnings
import warnings
warnings.filterwarnings("ignore")



In [2]:
data_lax=pd.read_csv('airline_details_LAX.csv')
data_lax = data_lax.dropna(how='any')
data_lax = data_lax.rename(columns={ ' month':'month', ' weather_ct':'weather_ct', ' arr_delay':'arr_delay',
                                   ' carrier_delay':'carrier_delay'})
data_lax.columns

FileNotFoundError: File b'airline_details_LAX.csv' does not exist

In [None]:
data_lax = data_lax[['year','month', 'carrier', 'carrier_name', 'airport', 'airport_name', 'arr_flights', 'arr_del15', 'carrier_ct', 'weather_ct', 'nas_ct', 'security_ct', 'late_aircraft_ct', 'arr_cancelled', 'arr_diverted']]



In [None]:
data_lax = data_lax[data_lax['year'] != 2019] # data from 2010 to 2018 
data_lax.head()

In [None]:
data_lax.describe()

In [None]:
data_lax.info()

### Flight Delays

In [None]:
# flight delays:
total_delays = data_lax['arr_del15'].sum()
total_carrier_delays = data_lax['carrier_ct'].sum()
total_weather_delays = data_lax['weather_ct'].sum()
total_nas_delays = data_lax['nas_ct'].sum()
total_security_delays = data_lax['security_ct'].sum()
total_late_aircraft_delays = data_lax['late_aircraft_ct'].sum()


In [None]:
delay_df = pd.DataFrame([{'Total Delays':total_delays, "Faulty Carrier":total_carrier_delays, "Poor Weather":total_weather_delays,
                         "NAS":total_nas_delays, "Security Issues":total_security_delays,
                         "Late Arrivals":total_late_aircraft_delays}])

In [None]:
delay_df

In [None]:
# Label the characteristics for the pie chart.
labels = ["Faulty Carrier", "Poor Weather", "NAS", "Security Issues", "Late Arrivals"]
sizes = [total_carrier_delays, total_weather_delays, total_nas_delays, total_security_delays, total_late_aircraft_delays]
colors = ['gold', 'yellowgreen', 'lightcoral', 'blue', 'lightskyblue']
explode = (0.1, 0, 0, 0, 0)

# Create pie chart.
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=140)
plt.axis('equal')

### Flights over the years:

In [None]:
# Function to group by, select required columns and compute total number of flights

def aggr_group_df(df, group, aggr_field, fields):
    new_df = pd.DataFrame(df.groupby(by=group)[fields].sum())
    new_df[aggr_field] = new_df.apply(lambda row: sum(row[fields]), axis = 1)
    return new_df


In [None]:
FLIGHTS_AGGR_FIELDS = ['arr_flights', 'arr_del15', 'arr_cancelled', 'arr_diverted']
TOTAL_FLIGHTS_FIELD = 'total_flights'

In [None]:
# call function to group by year:

data_grouped_yr = aggr_group_df(data_lax, ['year'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)
data_grouped_yr

In [None]:
# Plot the graph

x_axis = data_grouped_yr.index.values
data_grouped_yr.plot.bar(x_axis, 'total_flights')

plt.title('Total Number Of Flights (2010-2018)')
plt.xlabel('Year')
plt.ylabel('total number')

plt.savefig("Images/Total Number Of Flights(2010-2018).png")
plt.show()

### Total flights over months

In [None]:
# function to convert 'month' to month-names

def to_month_name (month_idx):
    month_name = calendar.month_abbr[month_idx]
    return month_name


In [None]:
data_lax['month_name'] = data_lax['month'].apply(to_month_name)
data_lax

In [None]:
# call function to group by month:
data_grouped_mnth = aggr_group_df(data_lax, ['month_name'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)

data_grouped_mnth.index = ['Jan','Feb','Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
data_grouped_mnth

In [None]:
# plot

x_axis = data_grouped_mnth.index.values
data_grouped_mnth.plot.bar(x_axis, 'total_flights')

plt.title('Total Number Of Flights Over Months')
plt.xlabel('Month')
plt.ylabel('total number')

plt.savefig("Images/Total Number Of Flights Over Months.png")
plt.show()

### Total number of flights over the years

In [None]:
# call function to group by year and carrier_name

carrier_over_years = aggr_group_df(data_lax, [ 'year', 'carrier_name'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)
carrier_over_years.head(20)

In [None]:
#compute total number of flights per airline over the years

airlines_df = carrier_over_years.pivot_table(index='carrier_name', columns='year')['total_flights']
airlines_df = airlines_df.dropna(how='any')
airlines_df

In [None]:
# plot

fig, ax = plt.subplots(figsize=(20, 10))
x_axis = [year for year in airlines_df]

num_of_plots = len(airlines_df.index)
colormap = plt.cm.tab10
ax.set_color_cycle([colormap(i) for i in np.linspace(0,1,num_of_plots)])

for index in airlines_df.index.values:
    airlines_df.loc[index,:].plot(kind='line', marker="8", linestyle='-')

plt.title('Total Flights Per Carrier (2010-2018)')
plt.xlabel('Year')
plt.ylabel('total number')

plt.legend(loc='best')
plt.savefig("Images/Total Number Of Flights Per Carrier.png")
plt.show()

## Flights on time

In [None]:
# airlines  - perecentage of flights on times 

ontime_arr_pct = pd.DataFrame((carrier_over_years['arr_flights'] / carrier_over_years['total_flights']) * 100)

ontime_arr_pct = ontime_arr_pct.pivot_table(index='carrier_name', columns='year')

ontime_arr_pct = ontime_arr_pct.dropna(how='any')[0]
ontime_arr_pct

In [None]:
# plot

fig, ax = plt.subplots(figsize=(20, 10))
x_axis = [year for year in ontime_arr_pct]

num_of_plots = len(airlines_df.index)
colormap = plt.cm.tab10
ax.set_color_cycle([colormap(i) for i in np.linspace(0,1,num_of_plots)])

for airline in ontime_arr_pct.index.values:
    ontime_arr_pct.loc[airline,:].plot(kind='line', marker="8", linestyle='-')
    
plt.title('On-Time Arrival Percentage (2010-2018)')
plt.xlabel('Year')
plt.ylabel('Percentage(%)')

plt.legend(loc='best')
plt.savefig("On-time Arrival Percentage (2010-2018.png")
plt.show()


## Delay Analysis

In [None]:
delay_data = data_lax[['year','month','carrier','carrier_name','airport','airport_name','arr_flights','arr_del15','carrier_ct','weather_ct','nas_ct','security_ct','late_aircraft_ct','arr_cancelled','arr_diverted']]

delay_data.head()

In [None]:
# groupdata by year:
delay_data = delay_data.groupby('year')[['arr_del15', 'carrier_ct', 'weather_ct', 'nas_ct', 'security_ct', 'late_aircraft_ct']].sum()
delay_data

In [None]:
# function to find delay-percentage

def find_percentage(flight_delays, total_flight_delays):
        pct = (flight_delays / total_flight_delays) * 100
        return pct
   
        

In [None]:
# call function to find the various delay percentages

delay_data['carrier_ct%'] = delay_data.apply(lambda row: find_percentage(row['carrier_ct'], row['arr_del15']),axis=1)

delay_data['weather_ct%'] = delay_data.apply(lambda row: find_percentage(row['weather_ct'], row['arr_del15']),axis=1)

delay_data['nas_ct%'] = delay_data.apply(lambda row: find_percentage(row['nas_ct'], row['arr_del15']),axis=1)

delay_data['security_ct%'] = delay_data.apply(lambda row: find_percentage(row['security_ct'], row['arr_del15']),axis=1)

delay_data['late_aircraft_ct%'] = delay_data.apply(lambda row: find_percentage(row['late_aircraft_ct'], row['arr_del15']),axis=1)

        
delay_data


In [None]:
# plot

fig, ax = plt.subplots(figsize=(20, 10))
x_axis = [year for year in delay_data.index.values]

num_of_plots = len(delay_data.index)
colormap = plt.cm.Set1
ax.set_color_cycle([colormap(i) for i in np.linspace(0,1,num_of_plots)])

ax1 = delay_data['carrier_ct%'].plot(marker='o', linestyle='-', label='carrier_delay')
ax2 = delay_data['weather_ct%'].plot(marker='o', linestyle='-',label='weather_delay')
ax3 = delay_data['nas_ct%'].plot(marker='o', linestyle='-',label='nas_delay')
ax4 = delay_data['security_ct%'].plot(marker='*', linestyle='-',label='security_delay')
ax5 = delay_data['late_aircraft_ct%'].plot(marker='x', linestyle='-',label='late_aircraft_delay', alpha=0.5)

plt.title('Causes Of Delays Over The Years')
plt.xlabel('Year')
plt.ylabel('Percentage(%)')
plt.legend()

plt.savefig("Images/Causes Of Delays Over The Years.png")
plt.show()

In [None]:
# ontime arrival vs delays

data_grouped_yr


In [None]:
#plot

data_grouped_yr['arr_flights'].plot()
data_grouped_yr['arr_del15'].plot()

plt.title('Ontime Arrival vs Delays')
plt.xlabel('Year')
plt.ylabel('Count')
plt.legend()

plt.savefig("Images/Ontime Arrival vs Delays.png")
plt.show()

In [None]:
p1 = data_grouped_yr['arr_del15']

In [None]:
data_atl = pd.read_csv('airline_details_ATL.csv')
data_atl = data_atl.rename(columns={ ' month':'month', ' weather_ct':'weather_ct', ' arr_delay':'arr_delay',
                                 ' carrier_delay':'carrier_delay'})
data_atl.head()

In [None]:
data_atl = data_atl[['year','month', 'carrier', 'carrier_name', 'airport', 'airport_name', 'arr_flights', 'arr_del15', 'carrier_ct', 'weather_ct', 'nas_ct', 'security_ct', 'late_aircraft_ct', 'arr_cancelled', 'arr_diverted']]
data_atl.head()


In [None]:
data_atl = data_atl[data_atl['year'] != 2019] # data from 2010 to 2018 
data_atl.head()

In [None]:
data_grouped_yr_atl = aggr_group_df(data_atl, ['year'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)
p2 = data_grouped_yr_atl.rename(columns={'arr_del15':'arr_del_atl'})
p2 = p2['arr_del_atl']
p2 

In [None]:
p1

In [None]:
p2

In [None]:
p = []
p1 = list(p1)
p.append(p1)

p2 = list(p2)
p.append(p2)

x = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]

In [None]:
# pd.concat([s1, s2], ignore_index=True)
df = pd.DataFrame([p1, p2])

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
 
# Data
# x=range(1,6)
# y=[ [1,4,6,8,9], [2,2,7,10,12], [2,8,5,10,6] ]
 
# Plot
pal = sns.color_palette("Set1")

plt.stackplot(x,p, labels=['A','B'], colors=pal)
plt.legend(loc='upper left')
plt.show()


In [None]:
for x in df.columns:
    print(x)
    

In [None]:
df

In [None]:
for x in df.columns:
    print(x)
    
df.index.values

In [None]:
#Creating FacetGrid and mapping kde onto it
I1 = sns.FacetGrid(df, aspect=4)
I1.map(sns.kdeplot, 'year', shade=True)

#creating x range for the plot - minimum and maximum
oldest = [year for year in df.columns]
#I1.set(xlim=(0,4))

#adding legend to the graph
I1.add_legend()

In [None]:
# Repetitive codes

# 1. group by year
data_grouped_yr = data_atl.groupby(by='year')
data_grouped_yr = pd.DataFrame(data_grouped_yr[['arr_flights', 'arr_del15', 'arr_cancelled', 'arr_diverted']].sum())
data_grouped_yr['total_flights'] = data_grouped_yr['arr_flights'] + data_grouped_yr['arr_del15'] + data_grouped_yr['arr_cancelled']+ data_grouped_yr['arr_diverted']

# 2. group by month
data_grouped_month = data_atl.groupby(by='month')
data_grouped_month = pd.DataFrame(data_grouped_month[['arr_flights', 'arr_del15', 'arr_cancelled', 'arr_diverted']].sum())
data_grouped_month['total_flights'] = data_grouped_month['arr_flights'] + data_grouped_month['arr_del15'] + data_grouped_month['arr_cancelled'] + data_grouped_month['arr_diverted']

# 3. data_grouped_month = data_atl.groupby(by='month')
carrier_over_years = data_atl.groupby(by=['year', 'carrier_name'])
carrier_over_years = pd.DataFrame(carrier_over_years[['arr_flights', 'arr_del15', 'arr_cancelled', 'arr_diverted']].sum())
carrier_over_years['total_flights'] = carrier_over_years['arr_flights'] + carrier_over_years['arr_del15'] + carrier_over_years['arr_cancelled'] + carrier_over_years['arr_diverted']



In [None]:
# function:

def aggr_group_df(df, group, aggr_field, fields):
    new_df = pd.DataFrame(df.groupby(by=group)[fields].sum())
    new_df[aggr_field] = new_df.apply(lambda row: sum(row[fields]), axis = 1)
    return new_df
FLIGHTS_AGGR_FIELDS = ['arr_flights', 'arr_del15', 'arr_cancelled', 'arr_diverted']
TOTAL_FLIGHTS_FIELD = 'total_flights'

# function calls:

data_grouped_yr = aggr_group_df(data_atl, ['year'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)

data_grouped_mnth = aggr_group_df(data_atl, ['month_name'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)

carrier_over_years = aggr_group_df(data_atl, [ 'year', 'carrier_name'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)

lax_delays = aggr_group_df(data_lax, ['year'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)['arr_del15']

jfk_delays = aggr_group_df(data_jfk, ['year'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)['arr_del15']

chicago_delays = aggr_group_df(data_chicago, ['year'], TOTAL_FLIGHTS_FIELD, FLIGHTS_AGGR_FIELDS)['arr_del15']
