# Hurricane Season Analysis

In [1]:
import pandas as pd
import citipy as cipy
from datetime import datetime
import numpy as np
import csv
file = "hurricane_data.csv"

In [2]:
def left(s, amount):
    return s[:amount]

def right(s, amount):
    return s[-amount:]

def mid(s, offset, amount):
    return s[offset:offset+amount]

In [3]:
hurricane_data = []

storm_id = "0000000"
storm_name = "UNNAMED"
with open(file) as hurricane_file:
    hurricane_reader = csv.reader(hurricane_file)
    next(hurricane_reader, None) 
    for row in hurricane_reader:
                if 'AL' in row[0]: 
                    storm_id = row[0].strip()
                    storm_name = row[1].strip()
                    #print(f'{storm_id}: {storm_name}')
                else:
                    oldformat = row[0]
                    datetimeobject = datetime.strptime(oldformat,'%Y%m%d')
                    #date = datetimeobject.strftime('%m-%d-%Y')
                    year = left(row[0] ,4)
                    landfall = row[2].strip()
                    status = row[3].strip()
                    latitude = row[4].strip()
                    longitude = row[5].strip()
                    wind = row[6].strip()
                    pressure = row[7].strip()
                    #date = row[0].strip()
                hurricane_data.append(
                    {
                        "Storm_Id": storm_id, 
                        "Name": storm_name,
                        "Date": oldformat,
                        "Year": year,
                        "Landfall": landfall,
                        "Status" : status,
                        "Latitude" : latitude,
                        "Longitude" : longitude,
                        "Windspeed" : wind,
                        "Pressure" : pressure
                    }
                )

In [4]:
hurricane_pd = pd.DataFrame(hurricane_data)
hurricane_pd = hurricane_pd.loc[:, ["Storm_Id", "Name", "Date", "Year", "Status", "Latitude",
                                    "Longitude", "Windspeed", "Pressure"]]
hurricane_pd.head()

Unnamed: 0,Storm_Id,Name,Date,Year,Status,Latitude,Longitude,Windspeed,Pressure
0,0,UNNAMED,18510625,1851,HU,28.0N,94.8W,80,-999
1,0,UNNAMED,18510625,1851,HU,28.0N,95.4W,80,-999
2,0,UNNAMED,18510625,1851,HU,28.0N,96.0W,80,-999
3,0,UNNAMED,18510625,1851,HU,28.1N,96.5W,80,-999
4,0,UNNAMED,18510625,1851,HU,28.2N,96.8W,80,-999


In [5]:
hurricane_pd.count()

Storm_Id     52150
Name         52150
Date         52150
Year         52150
Status       52150
Latitude     52150
Longitude    52150
Windspeed    52150
Pressure     52150
dtype: int64

In [6]:
hurricane_pd.dtypes

Storm_Id     object
Name         object
Date         object
Year         object
Status       object
Latitude     object
Longitude    object
Windspeed    object
Pressure     object
dtype: object

In [7]:
hurricane_pd['Windspeed'] = pd.to_numeric(hurricane_pd['Windspeed'])
hurricane_pd['Pressure'] = pd.to_numeric(hurricane_pd['Pressure'])
hurricane_pd['Date'] = pd.to_numeric(hurricane_pd['Date'])
hurricane_pd.dtypes

Storm_Id     object
Name         object
Date          int64
Year         object
Status       object
Latitude     object
Longitude    object
Windspeed     int64
Pressure      int64
dtype: object

In [8]:
hurricane_df_clean = hurricane_pd.loc[hurricane_pd["Year"] > "2002"]
hurricane_df_clean = hurricane_df_clean.loc[hurricane_df_clean['Status'].isin(['TS','HU'])]
hurricane_df_clean = hurricane_df_clean.loc[hurricane_df_clean['Name'] != "UNNAMED"]
hurricane_df_clean

Unnamed: 0,Storm_Id,Name,Date,Year,Status,Latitude,Longitude,Windspeed,Pressure
44120,AL012003,ANA,20030421,2003,TS,31.4N,66.4W,50,995
44121,AL012003,ANA,20030421,2003,TS,30.6N,65.8W,50,995
44122,AL012003,ANA,20030421,2003,TS,30.0N,64.8W,50,995
44123,AL012003,ANA,20030421,2003,TS,29.4N,63.1W,50,994
44124,AL012003,ANA,20030422,2003,TS,29.4N,61.5W,50,994
44125,AL012003,ANA,20030422,2003,TS,29.5N,60.1W,50,994
44126,AL012003,ANA,20030422,2003,TS,29.6N,58.5W,45,995
44127,AL012003,ANA,20030422,2003,TS,29.7N,56.6W,45,996
44128,AL012003,ANA,20030423,2003,TS,30.0N,54.7W,40,998
44129,AL012003,ANA,20030423,2003,TS,30.6N,53.0W,40,1000


In [9]:
hurricane_df_clean["Start Date"] = hurricane_df_clean.groupby("Name")["Date"].min()
hurricane_df_clean["End Date"] = hurricane_df_clean.groupby("Name")["Date"].max()

hurricane_df_clean

Unnamed: 0,Storm_Id,Name,Date,Year,Status,Latitude,Longitude,Windspeed,Pressure,Start Date,End Date
44120,AL012003,ANA,20030421,2003,TS,31.4N,66.4W,50,995,,
44121,AL012003,ANA,20030421,2003,TS,30.6N,65.8W,50,995,,
44122,AL012003,ANA,20030421,2003,TS,30.0N,64.8W,50,995,,
44123,AL012003,ANA,20030421,2003,TS,29.4N,63.1W,50,994,,
44124,AL012003,ANA,20030422,2003,TS,29.4N,61.5W,50,994,,
44125,AL012003,ANA,20030422,2003,TS,29.5N,60.1W,50,994,,
44126,AL012003,ANA,20030422,2003,TS,29.6N,58.5W,45,995,,
44127,AL012003,ANA,20030422,2003,TS,29.7N,56.6W,45,996,,
44128,AL012003,ANA,20030423,2003,TS,30.0N,54.7W,40,998,,
44129,AL012003,ANA,20030423,2003,TS,30.6N,53.0W,40,1000,,


In [10]:
# hurricane_df_max = hurricane_df_clean.reset_index()
hurricane_df_max_grouped = hurricane_df_clean.iloc[hurricane_df_clean.reset_index().groupby(['Name'])["Windspeed"].idxmax()]
hurricane_df_max_grouped.head(5)

Unnamed: 0,Storm_Id,Name,Date,Year,Status,Latitude,Longitude,Windspeed,Pressure,Start Date,End Date
46325,AL012006,ALBERTO,20060612,2006,TS,26.8N,86.3W,60,1000,,
44739,AL012004,ALEX,20040805,2004,HU,38.5N,66.0W,105,957,,
46129,AL262005,ALPHA,20051023,2005,TS,17.3N,69.6W,45,1000,,
44120,AL012003,ANA,20030421,2003,TS,31.4N,66.4W,50,995,,
49903,AL012013,ANDREA,20130606,2013,TS,27.8N,84.9W,55,995,,


# Strength of Storms

In [None]:
# Line graph to show average strength of storms over the years

# Storm Characteristics

# Most Dangerous Cities in Relationship to Storms

# Hurricane Season

# Storm Duration