In [110]:
import pandas as pd

#  Cyclonic Storm Data

Data is sourced from the Hurricane Research Division of the [Atlantic Oceanographic & Metereological Laboratory](https://www.aoml.noaa.gov/hrd/hurdat/International_Hurricanes.html).


## Hurricanes 

### Non-US Landfalls

Hurricane data source for [Non-US landfalls](https://www.aoml.noaa.gov/hrd/hurdat/International_Hurricanes.html).

In [111]:
hurricanes_non_us_landfalls_df = pd.read_csv('International_Atlantic _Hurricane_Landfalls.csv', parse_dates=['Date'], delimiter='\t')
hurricanes_non_us_landfalls_df.head()

Unnamed: 0,Index,Date,Time,Latitude,Longitude,Max Winds(kt),SS HWS,Central Pressure(mb),Landfall Country,Storm Names
0,5,1990-08-07,1700Z,20.9N,97.0W,85.0,2.0,980.0,Mexico,Diana
1,1991,NaT,,,,,,,,
2,4,1992-08-23,2100Z,25.4N,76.6W,140.0,5.0,923.0,Bahamas,Andrew
3,4,1992-08-24,0100Z,25.4N,77.8W,130.0,4.0,931.0,Bahamas,Andrew
4,8,1993-09-20,2100Z,21.2N,97.5W,85.0,2.0,970.0,Mexico,Gert


In [112]:
hurricanes_non_us_landfalls_df = hurricanes_non_us_landfalls_df.drop(columns=['Index']).dropna().drop_duplicates()
hurricanes_non_us_landfalls_df.reset_index(drop=True, inplace=True)
hurricanes_non_us_landfalls_df.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Max Winds(kt),SS HWS,Central Pressure(mb),Landfall Country,Storm Names
0,1990-08-07,1700Z,20.9N,97.0W,85.0,2.0,980.0,Mexico,Diana
1,1992-08-23,2100Z,25.4N,76.6W,140.0,5.0,923.0,Bahamas,Andrew
2,1992-08-24,0100Z,25.4N,77.8W,130.0,4.0,931.0,Bahamas,Andrew
3,1993-09-20,2100Z,21.2N,97.5W,85.0,2.0,970.0,Mexico,Gert
4,1995-09-05,1200Z,17.5N,61.7W,115.0,3.0,945.0,Barbuda,Luis


In [113]:
hurricanes_non_us_landfalls_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 139 entries, 0 to 138
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Date                  139 non-null    datetime64[ns]
 1   Time                  139 non-null    object        
 2   Latitude              139 non-null    object        
 3   Longitude             139 non-null    object        
 4   Max Winds(kt)         139 non-null    float64       
 5   SS HWS                139 non-null    float64       
 6   Central Pressure(mb)  139 non-null    float64       
 7   Landfall Country      139 non-null    object        
 8   Storm Names           139 non-null    object        
dtypes: datetime64[ns](1), float64(3), object(5)
memory usage: 47.0 KB


In [114]:
hurricanes_non_us_landfalls_df = hurricanes_non_us_landfalls_df.astype({'Time':'string', 'Latitude':'string', 'Longitude': 'string', 'Max Winds(kt)': 'float', 'SS HWS': 'float', 'Central Pressure(mb)': 'float', 'Landfall Country': 'string', 'Storm Names': 'string'})
hurricanes_non_us_landfalls_df.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Max Winds(kt),SS HWS,Central Pressure(mb),Landfall Country,Storm Names
0,1990-08-07,1700Z,20.9N,97.0W,85.0,2.0,980.0,Mexico,Diana
1,1992-08-23,2100Z,25.4N,76.6W,140.0,5.0,923.0,Bahamas,Andrew
2,1992-08-24,0100Z,25.4N,77.8W,130.0,4.0,931.0,Bahamas,Andrew
3,1993-09-20,2100Z,21.2N,97.5W,85.0,2.0,970.0,Mexico,Gert
4,1995-09-05,1200Z,17.5N,61.7W,115.0,3.0,945.0,Barbuda,Luis


In [115]:
hurricanes_non_us_landfalls_df.to_csv('International_Atlantic_Hurricane_Landfalls_cleaned.csv', index=False)

### USA Landfalls

Hurricane data source for [USA landfalls](https://www.aoml.noaa.gov/hrd/hurdat/All_U.S._Hurricanes.html).

In [116]:
hurricanes_usa_landfalls_df = pd.read_csv('USA_Hurricane_Landfalls.csv', delimiter='\t')
hurricanes_usa_landfalls_df.head()

Unnamed: 0,Year,Month,States Affected and Category by States,Highest Saffir-Simpson U.S. Category,Central Pressure(mb),Max Wind(kt),Name
0,1991,Aug,"RI, 2; MA, 2; NY, 2; CT, 2",2,962,90,Bob
1,1992,Aug,"FL, SE5, SW4; LA, 3",5,922,145,Andrew
2,1993,Aug,"* NC, 3",3,961,100,Emily
3,1995,Aug,"FL, NW2, SE1",2,973,85,Erin
4,1995,Oct,"FL, NW3, I-AL 1",3,942,100,Opal


In [117]:
hurricanes_usa_landfalls_df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 7 columns):
 #   Column                                  Non-Null Count  Dtype 
---  ------                                  --------------  ----- 
 0   Year                                    55 non-null     int64 
 1   Month                                   55 non-null     object
 2   States Affected and Category by States  55 non-null     object
 3   Highest Saffir-Simpson U.S. Category    55 non-null     int64 
 4   Central Pressure(mb)                    55 non-null     int64 
 5   Max Wind(kt)                            55 non-null     int64 
 6   Name                                    55 non-null     object
dtypes: int64(4), object(3)
memory usage: 12.1 KB


In [118]:
hurricanes_usa_landfalls_df = hurricanes_usa_landfalls_df.astype({'Year':'int', 'Month':'string', 'States Affected and Category by States': 'string', 
                                                                        'Highest Saffir-Simpson U.S. Category': 'string',
                                                                        'Central Pressure(mb)' : 'float',
                                                                        'Max Wind(kt)': 'float', 'Name': 'string'})
hurricanes_usa_landfalls_df.head()

Unnamed: 0,Year,Month,States Affected and Category by States,Highest Saffir-Simpson U.S. Category,Central Pressure(mb),Max Wind(kt),Name
0,1991,Aug,"RI, 2; MA, 2; NY, 2; CT, 2",2,962.0,90.0,Bob
1,1992,Aug,"FL, SE5, SW4; LA, 3",5,922.0,145.0,Andrew
2,1993,Aug,"* NC, 3",3,961.0,100.0,Emily
3,1995,Aug,"FL, NW2, SE1",2,973.0,85.0,Erin
4,1995,Oct,"FL, NW3, I-AL 1",3,942.0,100.0,Opal


In [119]:
max_wind = hurricanes_usa_landfalls_df.pop('Max Wind(kt)')
hurricanes_usa_landfalls_df.insert(5, 'Max Winds(kt)', max_wind)
hurricanes_usa_landfalls_df.head()

Unnamed: 0,Year,Month,States Affected and Category by States,Highest Saffir-Simpson U.S. Category,Central Pressure(mb),Max Winds(kt),Name
0,1991,Aug,"RI, 2; MA, 2; NY, 2; CT, 2",2,962.0,90.0,Bob
1,1992,Aug,"FL, SE5, SW4; LA, 3",5,922.0,145.0,Andrew
2,1993,Aug,"* NC, 3",3,961.0,100.0,Emily
3,1995,Aug,"FL, NW2, SE1",2,973.0,85.0,Erin
4,1995,Oct,"FL, NW3, I-AL 1",3,942.0,100.0,Opal


In [120]:
hurricanes_usa_landfalls_df['Date'] = hurricanes_usa_landfalls_df['Year'].astype(str) + '-' + hurricanes_usa_landfalls_df['Month'] + '-01'
hurricanes_usa_landfalls_df['Date'] = pd.to_datetime(hurricanes_usa_landfalls_df['Date'], format='%Y-%b-%d')
hurricanes_usa_landfalls_df.head()

Unnamed: 0,Year,Month,States Affected and Category by States,Highest Saffir-Simpson U.S. Category,Central Pressure(mb),Max Winds(kt),Name,Date
0,1991,Aug,"RI, 2; MA, 2; NY, 2; CT, 2",2,962.0,90.0,Bob,1991-08-01
1,1992,Aug,"FL, SE5, SW4; LA, 3",5,922.0,145.0,Andrew,1992-08-01
2,1993,Aug,"* NC, 3",3,961.0,100.0,Emily,1993-08-01
3,1995,Aug,"FL, NW2, SE1",2,973.0,85.0,Erin,1995-08-01
4,1995,Oct,"FL, NW3, I-AL 1",3,942.0,100.0,Opal,1995-10-01


In [121]:
name = hurricanes_usa_landfalls_df.pop('Name')
hurricanes_usa_landfalls_df['Storm Names'] = name
hurricanes_usa_landfalls_df.head()

Unnamed: 0,Year,Month,States Affected and Category by States,Highest Saffir-Simpson U.S. Category,Central Pressure(mb),Max Winds(kt),Date,Storm Names
0,1991,Aug,"RI, 2; MA, 2; NY, 2; CT, 2",2,962.0,90.0,1991-08-01,Bob
1,1992,Aug,"FL, SE5, SW4; LA, 3",5,922.0,145.0,1992-08-01,Andrew
2,1993,Aug,"* NC, 3",3,961.0,100.0,1993-08-01,Emily
3,1995,Aug,"FL, NW2, SE1",2,973.0,85.0,1995-08-01,Erin
4,1995,Oct,"FL, NW3, I-AL 1",3,942.0,100.0,1995-10-01,Opal


In [122]:
hurricanes_usa_landfalls_df.to_csv('USA_Hurricane_Landfalls_cleaned.csv', index=False)