In [194]:
import pandas as pd
import numpy as np
import requests
import json
import config
import urllib.request
from datetime import datetime
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
df = pd.read_excel('hurricanes_damage.xls', 'ATD of ICAT')

In [3]:
df.head()

Unnamed: 0,ATCF_ID,name,basedamage,ATD,ND,lf_ISO_TIME,lf_wind,lf_pressure,lf_state,WPC,population,lf_lat,lf_lon
0,AL011900,Galveston,30000000,2826.090656,171510000000,1900-09-09 02:30:00,120,936.0,TX,886.652943,119724,29.1711,-95.2018
1,AL041901,Storm 4 in 1901,1000000,28.612073,830000000,1901-08-14 20:30:00,75,973.0,LA,941.570046,371192,29.266667,-89.633333
2,AL031903,Storm 3 in 1903,670000,1394.193738,7410000000,1903-09-11 23:00:00,75,976.0,FL,1054.905202,4556,26.1,-80.1
3,AL031903,Storm 3 in 1903,0,0.0,0,1903-09-13 21:00:00,80,988.0,FL,1054.905202,25018,29.7989,-85.4572
4,AL021904,Storm 2 in 1904,1000000,62.698162,640000000,1904-09-14 13:30:00,70,,SC,1025.246032,155567,33.2666,-79.275275


In [4]:
df.shape

(247, 13)

In [5]:
def elevation(lat, lng):
    api = config.API_KEY
    url = "https://maps.googleapis.com/maps/api/elevation/json"
    request = urllib.request.urlopen(url+"?locations="+str(lat)+","+str(lng)+"&key="+api)
    try:
        results = json.load(request).get('results')
        if 0 < len(results):
            elevation = results[0].get('elevation')
            return elevation
        else:
            print('HTTP GET Request failed.')
    except ValueError:
        print('JSON decode failed: '+str(request))

In [6]:
elevation(29.171100, -95.201800)

1.306691884994507

In [7]:
ele_list = []
for index, row in df.iterrows():
    ele_list.append(elevation(row['lf_lat'], row['lf_lon']))

In [8]:
df['elevation'] = ele_list

In [9]:
df.head()

Unnamed: 0,ATCF_ID,name,basedamage,ATD,ND,lf_ISO_TIME,lf_wind,lf_pressure,lf_state,WPC,population,lf_lat,lf_lon,elevation
0,AL011900,Galveston,30000000,2826.090656,171510000000,1900-09-09 02:30:00,120,936.0,TX,886.652943,119724,29.1711,-95.2018,1.306692
1,AL041901,Storm 4 in 1901,1000000,28.612073,830000000,1901-08-14 20:30:00,75,973.0,LA,941.570046,371192,29.266667,-89.633333,0.084125
2,AL031903,Storm 3 in 1903,670000,1394.193738,7410000000,1903-09-11 23:00:00,75,976.0,FL,1054.905202,4556,26.1,-80.1,-4.914573
3,AL031903,Storm 3 in 1903,0,0.0,0,1903-09-13 21:00:00,80,988.0,FL,1054.905202,25018,29.7989,-85.4572,-10.469308
4,AL021904,Storm 2 in 1904,1000000,62.698162,640000000,1904-09-14 13:30:00,70,,SC,1025.246032,155567,33.2666,-79.275275,12.549053


In [10]:
rain = pd.read_csv('rainfall.csv')

In [11]:
rain.head()

Unnamed: 0,Station,Total,Lat,Lon,Storm,Year
0,ALEXANDRIA,0.27,31.316667,-92.466667,Abby 1964,1964
1,ALEXANDRIA #2,0.17,31.316667,-92.45,Abby 1964,1964
2,ALEXANDRIA ESLER RGNL AP,0.42,31.4,-92.3,Abby 1964,1964
3,AMITE,1.35,30.716667,-90.533333,Abby 1964,1964
4,ANDREW,1.14,30.083333,-92.25,Abby 1964,1964


In [12]:
rain.shape

(778466, 6)

In [13]:
rain['name'] = rain['Storm'].str.split(' ').str[0]

In [14]:
rain.head()

Unnamed: 0,Station,Total,Lat,Lon,Storm,Year,name
0,ALEXANDRIA,0.27,31.316667,-92.466667,Abby 1964,1964,Abby
1,ALEXANDRIA #2,0.17,31.316667,-92.45,Abby 1964,1964,Abby
2,ALEXANDRIA ESLER RGNL AP,0.42,31.4,-92.3,Abby 1964,1964,Abby
3,AMITE,1.35,30.716667,-90.533333,Abby 1964,1964,Abby
4,ANDREW,1.14,30.083333,-92.25,Abby 1964,1964,Abby


In [15]:
rain_df = pd.DataFrame(rain.groupby(['name']).Total.max())

In [16]:
rain_df.head()

Unnamed: 0_level_0,Total
name,Unnamed: 1_level_1
AL011900,10.25
AL011901,12.51
AL011902,7.97
AL011906,8.33
AL011907,6.13


In [17]:
rain_df.reset_index(level=0, inplace = True)

In [18]:
rain_df.shape

(421, 2)

In [19]:
temp = pd.merge(df, rain_df, on = 'name')

In [20]:
temp.head()

Unnamed: 0,ATCF_ID,name,basedamage,ATD,ND,lf_ISO_TIME,lf_wind,lf_pressure,lf_state,WPC,population,lf_lat,lf_lon,elevation,Total
0,AL051950,Easy,3300000,10.219997,2380000000,1950-09-04 12:00:00,85,973.0,FL,6456.844492,500084,27.7,-83.5,-36.703323,45.2
1,AL111950,King,28000000,73.529671,7220000000,1950-10-18 05:00:00,115,955.0,FL,6456.844492,589760,25.7,-80.2,-3.2,15.44
2,AL091951,How,2000000,44.107096,1850000000,1951-10-02 11:00:00,55,992.9375,FL,6985.034384,64916,26.75,-82.0,4.710114,15.72
3,AL021952,Able,2750000,14.010927,210000000,1952-08-31 03:00:00,85,983.0,SC,7260.088943,270348,32.3,-80.6,1.6766,6.89
4,AL031953,Barbara,1000000,11.234532,78000000,1953-08-14 05:00:00,80,980.583333,NC,7437.094601,119686,35.4,-76.1,0.405508,12.78


In [21]:
temp.shape

(168, 15)

In [22]:
temp2 = pd.merge(df.drop(columns = ['name']), rain_df, left_on = 'ATCF_ID', right_on = 'name', how = 'inner')

In [23]:
df = pd.concat([temp, temp2], ignore_index=True)

In [24]:
df.head()

Unnamed: 0,ATCF_ID,name,basedamage,ATD,ND,lf_ISO_TIME,lf_wind,lf_pressure,lf_state,WPC,population,lf_lat,lf_lon,elevation,Total
0,AL051950,Easy,3300000,10.219997,2380000000,1950-09-04 12:00:00,85,973.0,FL,6456.844492,500084,27.7,-83.5,-36.703323,45.2
1,AL111950,King,28000000,73.529671,7220000000,1950-10-18 05:00:00,115,955.0,FL,6456.844492,589760,25.7,-80.2,-3.2,15.44
2,AL091951,How,2000000,44.107096,1850000000,1951-10-02 11:00:00,55,992.9375,FL,6985.034384,64916,26.75,-82.0,4.710114,15.72
3,AL021952,Able,2750000,14.010927,210000000,1952-08-31 03:00:00,85,983.0,SC,7260.088943,270348,32.3,-80.6,1.6766,6.89
4,AL031953,Barbara,1000000,11.234532,78000000,1953-08-14 05:00:00,80,980.583333,NC,7437.094601,119686,35.4,-76.1,0.405508,12.78


In [25]:
df.shape

(228, 15)

## New Data to get Duration of hurricane

In [26]:
hurricane_list = []
with open('hurdat2.txt') as fp:
    line = fp.readline()
    while line:
        if 'AL' in line:
            storm_id = line.split(',')[0].strip()
            storm_name = line.split(',')[1].strip()
        else:
            text = line.split(',')
            oldformat = text[0] + text[1]
            datetimeobject = datetime.strptime(oldformat,'%Y%m%d %H%M%S')
            date = datetimeobject.strftime('%m-%d-%Y %H%M%S')
            hurricane_list.append(
                {
                    "storm_id": storm_id, 
                    "name": storm_name,
                    "date": date,
                }) 
        line = fp.readline() 


In [27]:
# Create Dataframe and reorder columns
hdf = pd.DataFrame(hurricane_list)
hdf = hdf.loc[:, ["storm_id", "name","date"]]

In [28]:
hdf.shape

(53501, 3)

In [29]:
hdf.head()

Unnamed: 0,storm_id,name,date
0,AL011851,UNNAMED,06-25-1851 000000
1,AL011851,UNNAMED,06-25-1851 060000
2,AL011851,UNNAMED,06-25-1851 120000
3,AL011851,UNNAMED,06-25-1851 180000
4,AL011851,UNNAMED,06-25-1851 210000


In [30]:
hdf['date'] = pd.to_datetime(hdf['date'])

### Getting min and max date

In [31]:
new_df = hdf.groupby('storm_id')
new_df = new_df['date']
start_date = new_df.min()
end_date = new_df.max()
duration = end_date - start_date

### Merge Start Date, End Date, and Duration to one dataframe. 

In [32]:
new_df = pd.DataFrame({"start_date": start_date
                             ,"end_date": end_date
                             ,"duration" : duration
                            }).reset_index()

In [33]:
new_df.head()

Unnamed: 0,storm_id,start_date,end_date,duration
0,AL011851,1851-06-25 00:00:00,1851-06-28 00:00:00,3 days 00:00:00
1,AL011852,1852-08-19 00:00:00,1852-08-30 00:00:00,11 days 00:00:00
2,AL011853,1853-08-05 12:00:00,1853-08-05 12:00:00,0 days 00:00:00
3,AL011854,1854-06-25 00:00:00,1854-06-27 12:00:00,2 days 12:00:00
4,AL011855,1855-08-06 12:00:00,1855-08-06 12:00:00,0 days 00:00:00


In [34]:
df = pd.merge(new_df[['storm_id', 'duration']], df, left_on = 'storm_id', right_on = 'ATCF_ID')

In [35]:
df.shape

(228, 17)

In [36]:
df.head()

Unnamed: 0,storm_id,duration,ATCF_ID,name,basedamage,ATD,ND,lf_ISO_TIME,lf_wind,lf_pressure,lf_state,WPC,population,lf_lat,lf_lon,elevation,Total
0,AL011900,19 days 18:00:00,AL011900,AL011900,30000000,2826.090656,171510000000,1900-09-09 02:30:00,120,936.0,TX,886.652943,119724,29.1711,-95.2018,1.306692,10.25
1,AL011918,6 days 18:00:00,AL011918,AL011918,5000000,326.956305,420000000,1918-08-06 18:00:00,105,955.0,LA,2379.407456,64270,29.8,-93.2,0.347064,4.91
2,AL011926,11 days 06:00:00,AL011926,AL011926,3051000,153.349839,7190000000,1926-07-28 06:30:00,90,967.0,FL,2616.761927,76032,28.564317,-80.53265,-6.941955,10.4
3,AL011929,3 days 06:00:00,AL011929,AL011929,675000,44.350049,110000000,1929-06-28 21:30:00,80,982.5,TX,2716.651077,56024,28.383333,-96.516667,0.584941,3.55
4,AL011934,17 days 06:00:00,AL011934,AL011934,2605000,80.255059,710000000,1934-06-16 19:00:00,85,966.0,LA,2151.24323,150885,29.7,-91.7,-2.3,10.0


### Dropping unwanted columns

In [37]:
df = df.drop(columns = ['ATCF_ID', 'basedamage', 'ATD', 'WPC', 'population'])

In [38]:
df.shape

(228, 12)

In [39]:
df.head()

Unnamed: 0,storm_id,duration,name,ND,lf_ISO_TIME,lf_wind,lf_pressure,lf_state,lf_lat,lf_lon,elevation,Total
0,AL011900,19 days 18:00:00,AL011900,171510000000,1900-09-09 02:30:00,120,936.0,TX,29.1711,-95.2018,1.306692,10.25
1,AL011918,6 days 18:00:00,AL011918,420000000,1918-08-06 18:00:00,105,955.0,LA,29.8,-93.2,0.347064,4.91
2,AL011926,11 days 06:00:00,AL011926,7190000000,1926-07-28 06:30:00,90,967.0,FL,28.564317,-80.53265,-6.941955,10.4
3,AL011929,3 days 06:00:00,AL011929,110000000,1929-06-28 21:30:00,80,982.5,TX,28.383333,-96.516667,0.584941,3.55
4,AL011934,17 days 06:00:00,AL011934,710000000,1934-06-16 19:00:00,85,966.0,LA,29.7,-91.7,-2.3,10.0


In [40]:
df.rename(columns={'ND': 'damage',
                   'lf_ISO_TIME': 'time',
                  'lf_wind': 'wind',
                  'lf_pressure': 'pressure',
                  'lf_state': 'state',
                  'lf_lat': 'lat',
                  'lf_lon': 'lon',
                  'Total': 'rainfall'},
          inplace=True, errors='raise')

In [41]:
df.head()

Unnamed: 0,storm_id,duration,name,damage,time,wind,pressure,state,lat,lon,elevation,rainfall
0,AL011900,19 days 18:00:00,AL011900,171510000000,1900-09-09 02:30:00,120,936.0,TX,29.1711,-95.2018,1.306692,10.25
1,AL011918,6 days 18:00:00,AL011918,420000000,1918-08-06 18:00:00,105,955.0,LA,29.8,-93.2,0.347064,4.91
2,AL011926,11 days 06:00:00,AL011926,7190000000,1926-07-28 06:30:00,90,967.0,FL,28.564317,-80.53265,-6.941955,10.4
3,AL011929,3 days 06:00:00,AL011929,110000000,1929-06-28 21:30:00,80,982.5,TX,28.383333,-96.516667,0.584941,3.55
4,AL011934,17 days 06:00:00,AL011934,710000000,1934-06-16 19:00:00,85,966.0,LA,29.7,-91.7,-2.3,10.0


In [42]:

category = []
for index,row in df.iterrows():
    if row['wind'] <= 73:
        category.append(0)
    elif row['wind'] >= 74 and row['wind'] <= 95:
        category.append(1)
    elif row['wind'] >= 96 and row['wind'] <= 110:
        category.append(2)
    elif row['wind'] >= 111 and row['wind'] <= 129:
        category.append(3)
    elif row['wind'] >= 130 and row['wind'] <= 156:
        category.append(4)
    elif row['wind'] >= 157:
        category.append(5)


In [43]:
df['category'] = category

In [44]:
df.head()

Unnamed: 0,storm_id,duration,name,damage,time,wind,pressure,state,lat,lon,elevation,rainfall,category
0,AL011900,19 days 18:00:00,AL011900,171510000000,1900-09-09 02:30:00,120,936.0,TX,29.1711,-95.2018,1.306692,10.25,3
1,AL011918,6 days 18:00:00,AL011918,420000000,1918-08-06 18:00:00,105,955.0,LA,29.8,-93.2,0.347064,4.91,2
2,AL011926,11 days 06:00:00,AL011926,7190000000,1926-07-28 06:30:00,90,967.0,FL,28.564317,-80.53265,-6.941955,10.4,1
3,AL011929,3 days 06:00:00,AL011929,110000000,1929-06-28 21:30:00,80,982.5,TX,28.383333,-96.516667,0.584941,3.55,1
4,AL011934,17 days 06:00:00,AL011934,710000000,1934-06-16 19:00:00,85,966.0,LA,29.7,-91.7,-2.3,10.0,1


In [45]:
df['duration'].loc[0].total_seconds()/3600

474.0

In [46]:
d = []
for index,row in df.iterrows():
    d.append(row['duration'].total_seconds()/3600)
df['duration'] = d

In [49]:
df.head()

Unnamed: 0,storm_id,duration,name,damage,time,wind,pressure,state,lat,lon,elevation,rainfall,category
0,AL011900,474.0,AL011900,171510000000,1900-09-09 02:30:00,120,936.0,TX,29.1711,-95.2018,1.306692,10.25,3
1,AL011918,162.0,AL011918,420000000,1918-08-06 18:00:00,105,955.0,LA,29.8,-93.2,0.347064,4.91,2
2,AL011926,270.0,AL011926,7190000000,1926-07-28 06:30:00,90,967.0,FL,28.564317,-80.53265,-6.941955,10.4,1
3,AL011929,78.0,AL011929,110000000,1929-06-28 21:30:00,80,982.5,TX,28.383333,-96.516667,0.584941,3.55,1
4,AL011934,414.0,AL011934,710000000,1934-06-16 19:00:00,85,966.0,LA,29.7,-91.7,-2.3,10.0,1


In [198]:
df.to_pickle('df.pkl')