# Exploratory Data Analysis of Atlantic Hurricanes 1850-2017

### Background
After cleaning and parsing the HURDAT dataset on Atlantic Ocean hurricanes, we will do some initial exploratiry data analysis to get a better feel for the data. The aim of this is to potentially uncover some interesting trends and features of the dataset through visualizations and analysis.

### Contents
1. Load Data
2. Initial Exploration

In [1]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

## 1. Load Data

In [2]:
with open('../data/hurricanes_cleaned.pkl', 'rb') as handle:
    hurricanes = pickle.load(handle)

## 2. Initial Exploration

In [3]:
num_hurricanes = len(hurricanes)
print(f'There are data for {num_hurricanes} storms in this dataset.')

There are data for 1848 storms in this dataset.


### A. Number of storms and hurricanes in each year

In [4]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.tools import FigureFactory as FF
from plotly.tools import get_embed
from plotly import offline

In [5]:
a = range(1851, 2018)

# Count number of storms
storm_count_by_yr = dict((el,0) for el in a)
for storm_id, storm_data in hurricanes.items():
    year = storm_data['year']
    storm_count_by_yr[year] += 1
    
# Count number of hurricanes
hurricane_count_by_yr = dict((el,0) for el in a)
for storm_id, storm_data in hurricanes.items():
    if storm_data['is_hurricane']==True:
        year = storm_data['year']
        hurricane_count_by_yr[year] += 1

In [6]:
x = list(storm_count_by_yr.keys())
y1 = list(storm_count_by_yr.values())
y2 = list(hurricane_count_by_yr.values())

trace1 = go.Scatter(
    x = x,
    y = y1,
    mode = 'lines+markers',
    name='Number of Tropical Storms'
)

trace2 = go.Scatter(
    x = x,
    y = y2,
    mode = 'lines+markers',
    name='Number of Hurricanes'
)

data = [trace1, trace2]

layout = go.Layout(
    title=go.layout.Title(
        text='Number of Tropical Storms and Hurricanes by Year',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Year',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Number of Storms',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)

fig1 = go.Figure(data=data, layout=layout)
py.iplot(fig1, filename='storms-by-year')

In [7]:
offline.plot(fig1, filename='../plots/ts_and_hurricanes_by_year.html')
offline.plot(fig1, filename='../../hm9464.github.io/plots/ts_and_hurricanes_by_year.html')

'../../hm9464.github.io/plots/ts_and_hurricanes_by_year.html'

There seems to be a definite trend towards an increasing number of storms since 1851. Let's go deeper and look at the number of hurricanes in each year. A hurricane is a storm categorized as having wind speed > 74mph. 

What's interesting to note is that there are more years with > 10 hurricanes compared to in the past. In the period 1851-1990, there were 9 years where the number of hurricanes was 10 or greater. Since 1990, there have been 6 years with 10 or more hurricanes. 

### B. Storm Duration over time

In [8]:
# Count number of storms
storm_duration_by_yr = dict((el,[]) for el in a)
for storm_id, storm_data in hurricanes.items():
    year = storm_data['year']
    duration = storm_data['duration']
    storm_duration_by_yr[year].append(duration)

In [9]:
avg_storm_duration_by_yr = dict((el,[]) for el in a)

for year, amounts in storm_duration_by_yr.items():
    day = np.mean(storm_duration_by_yr[year]).days
    day_decimal = np.mean(storm_duration_by_yr[year]).seconds / 86400 # 1 day = 86400 seconds
    avg_time = day + day_decimal
    avg_storm_duration_by_yr[year] = avg_time

In [10]:
x = list(avg_storm_duration_by_yr.keys())
y1 = list(avg_storm_duration_by_yr.values())

trace1 = go.Scatter(
    x = x,
    y = y1,
    mode = 'lines+markers',
    name='Average Duration (Days)'
)

data = [trace1]

layout = go.Layout(
    title=go.layout.Title(
        text='Average Duration of Tropical Storms by Year',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Year',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Average Duration (Days)',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)

fig2 = go.Figure(data=data, layout=layout)
py.iplot(fig2, filename='avg-duration-by-year')

In [11]:
offline.plot(fig2, filename='../plots/ts_avg_duration_by_year.html')
offline.plot(fig2, filename='../../hm9464.github.io/plots/ts_avg_duration_by_year.html')

'../../hm9464.github.io/plots/ts_avg_duration_by_year.html'

### C. Number of Landfalls over time

In [12]:
# Count number of landfalls
landfall_count_by_yr = dict((el,0) for el in a)
for storm_id, storm_data in hurricanes.items():
    if storm_data['landfall']==True:
        year = storm_data['year']
        landfall_count_by_yr[year] += 1

In [13]:
x = list(landfall_count_by_yr.keys())
y1 = list(landfall_count_by_yr.values())

trace1 = go.Scatter(
    x = x,
    y = y1,
    mode = 'lines+markers',
    name='Number of Landfalls'
)

data = [trace1]

layout = go.Layout(
    title=go.layout.Title(
        text='Number of Tropical Storms and Hurricanes Making Landfall by Year',
        xref='paper',
        x=0
    ),
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Year',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Number of Landfalls',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    )
)

fig3 = go.Figure(data=data, layout=layout)
py.iplot(fig3, filename='avg-duration-by-year')

In [14]:
offline.plot(fig3, filename='../plots/ts_landfalls_by_year.html')
offline.plot(fig3, filename='../../hm9464.github.io/plots/ts_landfalls_by_year.html')

'../../hm9464.github.io/plots/ts_landfalls_by_year.html'