# The Chinese Zodiac and Airplane Crashes

![alt text](https://upload.wikimedia.org/wikipedia/commons/e/e3/20100720_Fukuoka_Kushida_3614_M.jpg =250x "Chinese Zodiac")

### Do the Zodiacs Influence Aircraft Accidents?

The Chinese Zodiac is a 12 year cycle, with each year being represented by a different animal from the Chinese lunar calendar. According to ancient superstition, you will have bad luck during the Zodiac year in which you were born. 

But does this bad luck influence aircraft fatalities? Maybe if you're superstitious, but over the years they have fallen into a fairly tight distribution. 

- Deadliest Zodiac since 1908: **Ox** - 10,134 Deaths
- Safest Zodiac since 1908: **Rabbit** - 6,956 Deaths


In [1]:
import numpy as np
import pandas as pd
import datetime
from bokeh.charts import Scatter, Bar, show, output_notebook
# from bokeh.palettes import brewer
# # colors = brewer['Spectral'][11]
output_notebook()

data = pd.read_csv('crashes.csv')


In [2]:
data.sample()

Unnamed: 0,Date,Time,Location,Operator,Flight #,Route,Type,Registration,cn/In,Aboard,Fatalities,Ground,Summary
4067,12/27/1991,08:51,"Stockholm, Sweden",Scandinavian Airlines (SAS),751,Stockholm-Copenhagen-Warsaw,MDonnell Douglas MD-81,OY-KHO,53003,129.0,0.0,0.0,"The aircraft reached an altitude of 3,000 feet..."


In [3]:
# Return a bunch of tuples with the Zodiac and its Start/End Dates
def chinese_zodaics():
    start_date = pd.to_datetime("2/2/1908")
    end_date = pd.to_datetime("7/1/2009")
    animals = ['Monkey', 'Rooster', 'Dog', 'Pig', 'Rat', 'Ox', 'Tiger', 'Rabbit', 'Dragon', 'Snake', 'Horse', 'Goat']
    zodiacs = []
    while start_date < end_date:
        for a in animals:    
            year_start = start_date
            year_end = year_start + pd.DateOffset(days=365)
            z = (a, start_date, year_end)
            zodiacs.append(z)
            start_date = year_end
    return zodiacs 

zodiacs = chinese_zodaics()

# Apply the zodiacs to the accident dates
def match_zodiac(date):
    for z in zodiacs: 
        animal, start, end, = z[0], z[1], z[2]
        if start <= date <= end:
            return animal
        
data.Date = pd.to_datetime(data.Date)
data['Zodiac'] = data.Date.apply(match_zodiac)
data['Year'] = pd.DatetimeIndex(data['Date']).year
data = data[['Zodiac', 'Year', 'Fatalities', 'Aboard']].dropna()
data = data[data.Fatalities > 1]
data.sample(5)


Unnamed: 0,Zodiac,Year,Fatalities,Aboard
4501,Ox,1997,4.0,6.0
2536,Rat,1972,6.0,6.0
2409,Dog,1970,30.0,30.0
3396,Dog,1983,10.0,10.0
2792,Tiger,1975,9.0,9.0


In [4]:
data.describe().astype(int)

Unnamed: 0,Year,Fatalities,Aboard
count,4784,4784,4784
mean,1971,21,27
std,21,34,41
min,1912,2,2
25%,1955,4,5
50%,1973,11,14
75%,1990,24,30
max,2009,583,644


In [9]:
p = Scatter(data, x='Fatalities', y='Zodiac', marker='Zodiac', color='Zodiac',
            title="Fatalities by Zodiac", legend="top_left",
            xlabel="Fatalities", ylabel="Zodiac")


show(p)

In [6]:
# Put key stats into a DataFrame
def zodiac_data(data):
    idx=['Total_Accidents', 'Total_Deaths', 'Mean_Deaths', 'Death_Rate', 'Survival_Rate', 'Deadliest_Accident']
    df = pd.DataFrame()
    for z in data.Zodiac.unique(): 
        zodiac = data[data.Zodiac == z]
        f = zodiac.Fatalities.dropna()
        a = zodiac.Aboard
        total_accidents = f.count()
        total_deaths = f.sum()
        mean_deaths = f.mean()
        death_rate = total_deaths / a.sum()
        survival_rate = 1 - death_rate
        deadliest = f.max()
        df[z] = [total_accidents, total_deaths, mean_deaths, death_rate, survival_rate, deadliest]
    df.index = idx
    df = df.round(2).T
    return df

zodiac_comparison = zodiac_data(data)
zodiac_comparison

Unnamed: 0,Total_Accidents,Total_Deaths,Mean_Deaths,Death_Rate,Survival_Rate,Deadliest_Accident
Rat,448.0,9981.0,22.28,0.81,0.19,349.0
Ox,372.0,10134.0,27.24,0.83,0.17,520.0
Rabbit,359.0,6956.0,19.38,0.79,0.21,217.0
Dragon,390.0,8476.0,21.73,0.8,0.2,290.0
Snake,407.0,9231.0,22.68,0.79,0.21,583.0
Horse,372.0,7205.0,19.37,0.73,0.27,225.0
Monkey,405.0,8446.0,20.85,0.78,0.22,301.0
Rooster,394.0,8680.0,22.03,0.78,0.22,180.0
Dog,437.0,9446.0,21.62,0.75,0.25,264.0
Pig,396.0,8625.0,21.78,0.81,0.19,269.0


In [7]:
zodiac_comparison.describe().round(2)

Unnamed: 0,Total_Accidents,Total_Deaths,Mean_Deaths,Death_Rate,Survival_Rate,Deadliest_Accident
count,12.0,12.0,12.0,12.0,12.0,12.0
mean,398.67,8746.17,21.94,0.79,0.21,318.25
std,25.98,972.55,2.13,0.03,0.03,120.19
min,359.0,6956.0,19.37,0.73,0.17,180.0
25%,385.5,8440.75,20.72,0.78,0.19,254.25
50%,395.0,8652.5,21.76,0.8,0.2,282.5
75%,408.75,9373.25,22.38,0.81,0.22,346.75
max,448.0,10134.0,27.24,0.83,0.27,583.0


In [8]:
p = Bar(data, label='Zodiac', values='Fatalities', agg='mean', stack='Zodiac',
        title="Average Annual Deaths by Zodiac", legend='top_right')



show(p)