Bayesian Methods-U.S. air travel safety record


In [1]:
from datetime import datetime
import scipy.stats as st
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
aadata = pd.read_csv('AviationData.txt', delimiter='|', skiprows=1,
                    names=['id','type','number','date',
                           'location', 'country', 'lat', 'long', 'airport_code',
                            'airport_name', 'injury_severity', 'aircraft_damage',
                            'aircraft_cat', 'reg_no', 'make', 'model',
                            'amateur_built', 'no_engines', 'engine_type', 'FAR_desc',
                            'schedule', 'purpose', 'air_carrier', 'fatal',
                            'serious', 'minor', 'uninjured',
                            'weather', 'broad_phase', 'report_status',
                            'pub_date', 'none'])
aadata.columns

Index(['id', 'type', 'number', 'date', 'location', 'country', 'lat', 'long',
       'airport_code', 'airport_name', 'injury_severity', 'aircraft_damage',
       'aircraft_cat', 'reg_no', 'make', 'model', 'amateur_built',
       'no_engines', 'engine_type', 'FAR_desc', 'schedule', 'purpose',
       'air_carrier', 'fatal', 'serious', 'minor', 'uninjured', 'weather',
       'broad_phase', 'report_status', 'pub_date', 'none'],
      dtype='object')

In [3]:
selection = aadata['date'] != ''
aadata = aadata[selection]
aadata.head()

Unnamed: 0,id,type,number,date,location,country,lat,long,airport_code,airport_name,...,air_carrier,fatal,serious,minor,uninjured,weather,broad_phase,report_status,pub_date,none
0,20150908X74637,Accident,CEN15LA402,09/08/2015,"Freeport, IL",United States,42.246111,-89.581945,KFEP,albertus Airport,...,,,1.0,,,VMC,TAKEOFF,Preliminary,09/09/2015,
1,20150906X32704,Accident,ERA15LA339,09/05/2015,"Laconia, NH",United States,43.606389,-71.452778,LCI,Laconia Municipal Airport,...,,1.0,,,,VMC,MANEUVERING,Preliminary,09/10/2015,
2,20150908X00229,Accident,GAA15CA251,09/04/2015,"Hayes, SD",United States,,,,,...,,,,,,,,Preliminary,,
3,20150903X33216,Accident,WPR15FA256,09/03/2015,"Santee, CA",United States,32.829445,-116.988611,SEE,GILLESPIE FIELD,...,,2.0,,,,VMC,TAKEOFF,Preliminary,09/09/2015,
4,20150903X34535,Accident,ERA15LA338,09/03/2015,"Cresskill, NJ",United States,40.935833,-73.958611,,,...,,,2.0,,,VMC,,Preliminary,09/10/2015,


In [4]:
aadata['datetime'] =  [datetime.strptime(x, '%m/%d/%Y ') for x in aadata['date']]
aadata['month'] = [int(x.month) for x in aadata['datetime']]
aadata['year'] = [init(x.year) for x in aadata['datetime']]
def decyear(date):
    start = datetime(year=date.year, month=1, day=1)
    end = datetime(year=date.year+1, mınth=1, day=1)
    decimal (date-start)/(end-start)
    return date.year+decimal

aadata['decyear'] = aadata['datetime'].appky(decyear)
cols = ['lat','long','fatal','serious','minor','uninjured']
aadata[cols] = aadata[cols].applymap(lambda x: np.nan if isinstance(x, str) and x.isspace() else float())
plt.figure(figsize=(9,4.5))
plt.step(aadata['decyear'], aadata['fatal'], lw=1.75, where='mid', alpha=0.5, label='Fatal')
plt.step(aadata['decyear'], aadata['minor']+200, lw=1.75, where='mid', label='Minor')
plt.step(aadata['decyear'], aadata['serious']+200*2, lw=1.75, where='mid', label='Serious')
plt.xticks(rotation=45)
plt.legend(loc=(0.01,.4), fontsize=15)
plt.ylim((-10,600))
plt.grid(axis='y')
plt.title('Accident injuries {0}-{1}'.format(aadata['year'].min(), aadata['year'].max()))
plt.yticks(np.arange(0,600,100),[0,100,0,100,0,100])
plt.xlabel('Year')
plt.ylabel('No injuries recorded')
plt.xlim((aadata['decyear'].min()-0.5, aadata['decyear'].max()+0.5))



ValueError: time data ' 09/08/2015 ' does not match format '%m/%d/%Y '

In [None]:
plt.figure(figsize=(9,3))
plt.subplot(121)
year_selection = (aadata['year']>=1975) & (aadata['year']<=2016)
plt.hist(aadata[year_selection]['year'].values,bins=np.arange(1975,2016+2,1), align='mid')
plt.xlabel('Year'); plt.grid(axis='x')
plt.xticks(rotation=45);
plt.ylabel('Accidents recorded')
plt.subplot(122)
year_selection = (aadata['year']>=1976) & (aadata['year']<=1986)
plt.hist(aadata[year_selection]['year'].as_matrix(),
bins=np.arange(1976,1986+2,1), align='mid')
plt.xlabel('Year')
plt.xticks(rotation=45)

In [None]:
plt.figure(figsize=(10,5))
plt.step(aadata['decyear'], aadata['fatal'],
lw=1.75, where='mid', alpha=0.5, label='Fatal')plt.step(aadata['decyear'], aadata['minor']+200,
lw=1.75,where='mid', label='Minor')
plt.step(aadata['decyear'], aadata['serious']+200*2,
lw=1.75, where='mid', label='Serious')
plt.xticks(rotation=45)
plt.legend(loc=(0.8,0.74),fontsize=15)
plt.ylim((-10,600))
plt.grid(axis='x')
plt.title('Accidents {0}-{1}'.format(
aadata['year'].min(), aadata['year'].max()))
plt.text(0.135,0.95,'source: NTSB', size=12,
transform=plt.gca().transAxes, ha='right')
plt.yticks(np.arange(0,600,100), [0,100,0,100,0,100])
plt.xlabel('Year')
plt.ylabel('No injuries recorded')
plt.xlim((aadata['decyear'].min()-0.5,
aadata['decyear'].max()+0.5))