In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import folium
from folium import plugins
%matplotlib inline
pd.set_option('display.max_columns', 500)

In [2]:
fname='Dataset_SHIPS_RII_ATL.csv'
ships = pd.read_csv(fname)
ships.head()

Unnamed: 0,NAME,DATE,HOUR,VMX0,LAT,LON,MSLP,ID,DELV12,DELV24,DELV36,DELV48,PER,SHRD,D200,RHLO,PX30,SDBT,POT,OHC,TPW,PC2,U200,TPWC,AVBT,RSST
0,ALEX,980727,12,25,11.3,-25.4,1009,AL011998,0,5,10,10,9999,6.3,103,68,72,13.8,-101,12,0,-58,-7.9,55.7,-473,27.4
1,ALEX,980727,18,25,11.7,-27.2,1009,AL011998,0,5,10,10,9999,11.2,118,69,55,12.6,-102,17,0,-10,-6.4,55.7,-360,27.4
2,ALEX,980728,0,25,12.2,-29.2,1009,AL011998,5,10,10,10,0,8.6,116,71,70,12.8,-105,21,0,-3,-8.8,56.9,-381,27.4
3,ALEX,980728,6,25,12.6,-31.3,1008,AL011998,5,10,10,15,0,12.2,91,71,57,12.2,-100,29,0,-44,-6.0,52.7,-481,27.2
4,ALEX,980728,12,30,12.9,-33.3,1007,AL011998,5,5,5,10,5,10.5,88,71,83,10.1,-89,15,190,-46,-6.5,55.3,-516,27.1


In [3]:
# Set all 9999s as NaNs
ships = ships.replace(9999,np.NaN)
# drop NaNs
ships=ships.dropna()
ships.head()

Unnamed: 0,NAME,DATE,HOUR,VMX0,LAT,LON,MSLP,ID,DELV12,DELV24,DELV36,DELV48,PER,SHRD,D200,RHLO,PX30,SDBT,POT,OHC,TPW,PC2,U200,TPWC,AVBT,RSST
2,ALEX,980728,0,25,12.2,-29.2,1009,AL011998,5.0,10.0,10.0,10.0,0.0,8.6,116,71,70.0,12.8,-105,21.0,0,-3.0,-8.8,56.9,-381.0,27.4
3,ALEX,980728,6,25,12.6,-31.3,1008,AL011998,5.0,10.0,10.0,15.0,0.0,12.2,91,71,57.0,12.2,-100,29.0,0,-44.0,-6.0,52.7,-481.0,27.2
4,ALEX,980728,12,30,12.9,-33.3,1007,AL011998,5.0,5.0,5.0,10.0,5.0,10.5,88,71,83.0,10.1,-89,15.0,190,-46.0,-6.5,55.3,-516.0,27.1
5,ALEX,980728,18,30,13.1,-35.1,1006,AL011998,5.0,5.0,10.0,15.0,5.0,9.7,44,72,35.0,15.9,-86,22.0,15,36.0,-7.2,56.6,-270.0,27.1
6,ALEX,980729,0,35,13.3,-36.8,1005,AL011998,0.0,0.0,5.0,10.0,5.0,9.9,37,74,56.0,14.5,-80,24.0,0,-6.0,-10.3,57.7,-443.0,27.1


In [4]:
# Pad the date columns with 00 for the year 2000
ships['DATE'] = ships['DATE'].apply(lambda x: str(x).zfill(6))

# Extract month from date
ships['MONTH'] = ships['DATE'].apply(lambda x: str(x)[2:4])

# Extract year from date
ships['YEAR'] = ships['DATE'].apply(lambda x: ('19' + str(x)[0:2]) if (str(x)[0:1]!= '0' and str(x)[0:1]!= '1') else ('20' + str(x)[0:2]))
ships.head()

Unnamed: 0,NAME,DATE,HOUR,VMX0,LAT,LON,MSLP,ID,DELV12,DELV24,DELV36,DELV48,PER,SHRD,D200,RHLO,PX30,SDBT,POT,OHC,TPW,PC2,U200,TPWC,AVBT,RSST,MONTH,YEAR
2,ALEX,980728,0,25,12.2,-29.2,1009,AL011998,5.0,10.0,10.0,10.0,0.0,8.6,116,71,70.0,12.8,-105,21.0,0,-3.0,-8.8,56.9,-381.0,27.4,7,1998
3,ALEX,980728,6,25,12.6,-31.3,1008,AL011998,5.0,10.0,10.0,15.0,0.0,12.2,91,71,57.0,12.2,-100,29.0,0,-44.0,-6.0,52.7,-481.0,27.2,7,1998
4,ALEX,980728,12,30,12.9,-33.3,1007,AL011998,5.0,5.0,5.0,10.0,5.0,10.5,88,71,83.0,10.1,-89,15.0,190,-46.0,-6.5,55.3,-516.0,27.1,7,1998
5,ALEX,980728,18,30,13.1,-35.1,1006,AL011998,5.0,5.0,10.0,15.0,5.0,9.7,44,72,35.0,15.9,-86,22.0,15,36.0,-7.2,56.6,-270.0,27.1,7,1998
6,ALEX,980729,0,35,13.3,-36.8,1005,AL011998,0.0,0.0,5.0,10.0,5.0,9.9,37,74,56.0,14.5,-80,24.0,0,-6.0,-10.3,57.7,-443.0,27.1,7,1998


In [5]:
##Where do hurricanes occur (Latitude, Longitude)?
ships.groupby(["LAT", "LON"]).size()

LAT   LON  
8.9   -38.2    1
      -36.5    1
9.0   -39.9    1
9.1   -35.0    1
9.3   -41.4    1
              ..
39.2  -53.4    1
39.3  -48.7    1
39.4  -58.7    1
39.5  -58.0    1
40.1  -51.7    1
Length: 3678, dtype: int64

In [6]:
maps = folium.Map(location=[12.2, -29.2], zoom_start = 3)
for index,row in ships.iterrows():
    lat = row["LAT"]
    lon = row["LON"]
    name = row["NAME"]
    folium.Marker([lat,lon],popup=name).add_to(maps)

In [7]:
maps
##shows general location of Hurricanes alongside east coast of USA, Central America, and Atlantic Ocean

In [8]:
m = folium.Map([12.2, -29.2], zoom_start=5)
m

In [9]:
for index, row in ships.iterrows():
    folium.CircleMarker([row['LAT'], row['LON']],
                        radius=15,
                        popup=row['NAME'],
                        fill_color="#3db7e4", # divvy color
                       ).add_to(m)

In [10]:
stationArr = ships[['LAT', 'LON']]

# plot heatmap
m.add_children(plugins.HeatMap(stationArr, radius=15))
m

  after removing the cwd from sys.path.


In [11]:
hurricane = ships['NAME'].value_counts() 
print(hurricane)
##overall there are 130 hurricanes recorded in this data set 

MARI    87
JOSE    87
KYLE    85
DANI    82
LISA    80
        ..
TD12     2
TONY     2
COLI     2
DON      2
GAMM     1
Name: NAME, Length: 130, dtype: int64


In [12]:
##What months do hurricanes occur?
count = ships['MONTH'].value_counts() 
print(count)

##Hurricanes occur usually throughout year as suggested by the frequencies however they are most frequent in 
##September, August, October, July, and November with some occurring during the months of June, December, January
##May and April. 

09    1794
08     870
10     530
07     230
11     176
06      61
12      43
01      17
05      16
04       6
Name: MONTH, dtype: int64


In [13]:
##How many hurricanes per year (count)?
year= ships['YEAR'].value_counts()
print(year)

2005    349
2004    281
2012    262
1999    254
1998    246
2016    239
2017    237
2003    236
2010    228
2008    219
2011    193
2001    176
2002    167
2006    160
2015    119
2000    119
2014     80
2007     72
2013     59
2009     47
Name: YEAR, dtype: int64
