In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels
import matplotlib.pyplot as plt
import math
import codecademylib3
import matplotlib.ticker as mtick

## Read in Data
flight = pd.read_csv("flight.csv")
#print(flight.head())
print(flight.columns)

## Task 1
#print(flight.coach_price.describe())
fclass_tix = flight.firstclass_price
coach_tix = flight.coach_price

f, ax = plt.subplots()
sns.boxplot(coach_tix)
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.xaxis.set_major_formatter(tick)
plt.xlabel('Price for Ticket')
plt.title('All Coach Flights')
plt.legend()
plt.show()
plt.clf()

#print(np.min(coach_tix))
#print(np.median(coach_tix))
#print(np.average(coach_tix))
#print(np.max(coach_tix))

## Task 2
f, ax = plt.subplots()
coach_8 = flight.coach_price[flight.hours == 8]
sns.boxplot(coach_8)
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.xaxis.set_major_formatter(tick)
plt.xlabel('Price for Ticket')
plt.title('8-Hour Long Coach Flights')
plt.legend()
plt.show()
plt.clf()

#print(np.min(coach_8))
#print(np.median(coach_8))
#print(np.average(coach_8))
#print(np.max(coach_8))

## Task 3
delays = flight.delay
delay_60 = flight.delay[flight.delay < 60]
#print(delays.describe())
delay_60_avg = np.average(delay_60)
delay_60_med = np.median(delay_60)

plt.hist(delay_60, edgecolor='black')
plt.axvline(delay_60_avg, color='r', linestyle='solid', linewidth=2, label="average")
plt.axvline(delay_60_med, color='y', linestyle='dotted', linewidth=3, label="median")
plt.xlabel('Time in Minutes')
plt.title('Flight Delays Under 1 Hour')
plt.legend()
plt.show()
plt.clf()

delay_60plus = flight.delay[flight.delay > 60]
plus_60_avg = np.average(delay_60plus)
plus_60_med = np.median(delay_60plus)

plt.hist(delay_60plus, edgecolor='black')
plt.axvline(plus_60_avg, color='r', linestyle='solid', linewidth=2, label="average")
plt.axvline(plus_60_med, color='y', linestyle='dotted', linewidth=3, label="median")
plt.xlabel('Time in Minutes')
plt.title('Flight Delays Over 1 Hour')
plt.legend()
plt.show()
plt.clf()

## Task 4
perc = 0.01
flight_sub = flight.sample(n = int(flight.shape[0]*perc))

sns.lmplot(x='coach_price', y='firstclass_price', data=flight_sub, line_kws={'color':'black'}, lowess=True)
plt.xlabel('Coach')
plt.ylabel('First Class')
plt.title('Price Comparison')
plt.tight_layout()
plt.show()
plt.clf()

fclass_tix = flight.firstclass_price
coach_tix = flight.coach_price
perc = 0.0001
fclass_sub = flight.firstclass_price.sample(n = int(flight.shape[0]*perc))
coach_sub = flight.coach_price.sample(n = int(flight.shape[0]*perc))

n = 1  # This is our first dataset (out of 2)
t = 2 # Number of datasets
d = 12 # Number of sets of bars
w = .8 # Width of each bar
value1  = [t*element + w*n for element
             in range(d)]
n = 2  # This is our first dataset (out of 2)
t = 2 # Number of datasets
d = 12 # Number of sets of bars
w = .8 # Width of each bar
value2  = [t*element + w*n for element
             in range(d)]

middle_x = [(a + b) / 2.0 for a, b in zip(value1, value2)]

f, ax = plt.subplots()
plt.bar(value1, coach_sub, label='Coach')
plt.bar(value2, fclass_sub, label='First Class')
ax.set_xticks(middle_x)
ax.set_xticklabels({})
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.yaxis.set_major_formatter(tick)
plt.yticks(fontsize=10)
plt.title('Price Comparison')
plt.legend()
plt.show()
plt.clf()

## Task 5
f, ax = plt.subplots()
sns.set_palette('Set1')
sns.histplot(flight, x='coach_price', hue=flight.inflight_meal, legend=False)
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.xaxis.set_major_formatter(tick)
plt.xlabel('Ticket Price')
plt.ylabel('')
plt.title('Coach Inflight Meal')
plt.legend(loc=6, title='Inflight Meal', labels=['Yes', 'No'])
plt.show()
plt.clf()

f, ax = plt.subplots()
sns.set_palette('Set1')
sns.histplot(flight, x='coach_price', hue=flight.inflight_entertainment, legend=False)
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.xaxis.set_major_formatter(tick)
plt.xlabel('Ticket Price')
plt.ylabel('')
plt.title('Coach Inflight Entertainment')
plt.legend(loc=6, title='Inflight Entertainment', labels=['No', 'Yes'])
plt.show()
plt.clf()

f, ax = plt.subplots()
sns.set_palette('Set1')
sns.histplot(flight, x='coach_price', hue=flight.inflight_wifi, legend=False)
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.xaxis.set_major_formatter(tick)
plt.xlabel('Ticket Price')
plt.ylabel('')
plt.title('Coach Inflight WiFi')
plt.legend(loc=6, title='Inflight Wifi', labels=['No', 'Yes'])
plt.show()
plt.clf()

## Task 6
sns.lmplot(data=flight_sub, x='hours', y='passengers', x_jitter = 0.25, scatter_kws={'s': 5, 'alpha':0.2}, fit_reg = False)
plt.xlabel('# of Hours')
plt.ylabel('# of Passengers')
plt.title('Comparing Flight Hours to Total Passengers')
plt.tight_layout()
plt.show()
plt.clf()

## Task 7
sns.lmplot(data=flight_sub, x='coach_price', y='firstclass_price', hue='weekend', fit_reg=False, legend=False)
plt.xlabel('Coach')
plt.ylabel('First Class')
plt.title('Weekend vs Weekday Price')
plt.legend(loc=4, labels=['Weekend', 'Weekday'])
plt.tight_layout()
plt.show()
plt.clf()

## Task 8
f, ax = plt.subplots()
sns.boxplot('day_of_week', 'coach_price', data=flight, hue='redeye')
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.yaxis.set_major_formatter(tick)
plt.xticks(rotation=30)
plt.xlabel('Day of the Week')
plt.ylabel('Coach Price')
plt.title('Cost of Redeye Flights By Day')
plt.tight_layout()
plt.show()
plt.clf()