In [2]:
import pandas as pd
import citipy as cipy
from datetime import datetime
import numpy as np
import csv
file = "hurricane_data.csv"

In [3]:
def left(s, amount):
    return s[:amount]

def right(s, amount):
    return s[-amount:]

def mid(s, offset, amount):
    return s[offset:offset+amount]

In [4]:
hurricane_data = []

storm_id = "0000000"
storm_name = "UNNAMED"
with open(file) as hurricane_file:
    hurricane_reader = csv.reader(hurricane_file)
    next(hurricane_reader, None) 
    for row in hurricane_reader:
                if 'AL' in row[0]: 
                    storm_id = row[0].strip()
                    storm_name = row[1].strip()
#                     print(f'{storm_id}: {storm_name}')
                else:
                    oldformat = row[0]
                    datetimeobject = datetime.strptime(oldformat,'%Y%m%d')
                    date = datetimeobject.strftime('%m-%d-%Y')
                    year = left(row[0] ,4)
                    landfall = row[2].strip()
                    status = row[3].strip()
                    latitude = row[4].strip()
                    longitude = row[5].strip()
                    wind = row[6].strip()
                    pressure = row[7].strip()
                hurricane_data.append(
                    {
                        "Storm_Id": storm_id, 
                        "Name": storm_name,
                        "Date": date,
                        "Year": year,
                        "Landfall": landfall,
                        "Status" : status,
                        "Latitude" : latitude,
                        "Longitude" : longitude,
                        "Windspeed" : wind,
                        "Pressure" : pressure
                    }
                )    
                           
    

In [5]:
hurricane_pd = pd.DataFrame(hurricane_data)
hurricane_pd = hurricane_pd.loc[:, ["Storm_Id", "Name", "Date", "Year", "Status", "Latitude",
                                    "Longitude", "Windspeed", "Pressure"]]
hurricane_pd.head()

Unnamed: 0,Storm_Id,Name,Date,Year,Status,Latitude,Longitude,Windspeed,Pressure
0,0,UNNAMED,06-25-1851,1851,HU,28.0N,94.8W,80,-999
1,0,UNNAMED,06-25-1851,1851,HU,28.0N,95.4W,80,-999
2,0,UNNAMED,06-25-1851,1851,HU,28.0N,96.0W,80,-999
3,0,UNNAMED,06-25-1851,1851,HU,28.1N,96.5W,80,-999
4,0,UNNAMED,06-25-1851,1851,HU,28.2N,96.8W,80,-999


In [6]:
hurricane_pd.count()

Storm_Id     52150
Name         52150
Date         52150
Year         52150
Status       52150
Latitude     52150
Longitude    52150
Windspeed    52150
Pressure     52150
dtype: int64

In [7]:
hurricane_pd.dtypes

Storm_Id     object
Name         object
Date         object
Year         object
Status       object
Latitude     object
Longitude    object
Windspeed    object
Pressure     object
dtype: object

In [8]:
hurricane_pd['Windspeed'] = pd.to_numeric(hurricane_pd['Windspeed'])
hurricane_pd['Pressure'] = pd.to_numeric(hurricane_pd['Pressure'])
hurricane_pd['Date'] = pd.to_datetime(hurricane_pd['Date'])
hurricane_pd.dtypes

Storm_Id             object
Name                 object
Date         datetime64[ns]
Year                 object
Status               object
Latitude             object
Longitude            object
Windspeed             int64
Pressure              int64
dtype: object

In [30]:
hurricane_df_clean = hurricane_pd.loc[hurricane_pd["Year"] > "2002"]
hurricane_df_clean = hurricane_df_clean.loc[hurricane_df_clean['Status'].isin(['TS','HU'])]
hurricane_df_clean = hurricane_df_clean.loc[hurricane_df_clean['Name'] != "UNNAMED"]
hurricane_df_clean.dtypes

Storm_Id             object
Name                 object
Date         datetime64[ns]
Year                 object
Status               object
Latitude             object
Longitude            object
Windspeed             int64
Pressure              int64
dtype: object

In [34]:
hurricane_df_clean["Start Date"] = hurricane_df_clean.groupby("Name")["Date"].min()
hurricane_df_clean["End Date"] = hurricane_df_clean.groupby("Name")["Date"].max()

hurricane_df_clean.dtypes

Storm_Id              object
Name                  object
Date          datetime64[ns]
Year                  object
Status                object
Latitude              object
Longitude             object
Windspeed              int64
Pressure               int64
Start Date    datetime64[ns]
End Date      datetime64[ns]
dtype: object

In [36]:
# hurricane_df_max = hurricane_df_clean.reset_index()
hurricane_df_max_grouped = hurricane_df_clean.iloc[hurricane_df_clean.reset_index().groupby(['Name'])["Windspeed"].idxmax()]
hurricane_df_max_grouped.head(5)

Unnamed: 0,Storm_Id,Name,Date,Year,Status,Latitude,Longitude,Windspeed,Pressure,Start Date,End Date
46325,AL012006,ALBERTO,2006-06-12,2006,TS,26.8N,86.3W,60,1000,NaT,NaT
44739,AL012004,ALEX,2004-08-05,2004,HU,38.5N,66.0W,105,957,NaT,NaT
46129,AL262005,ALPHA,2005-10-23,2005,TS,17.3N,69.6W,45,1000,NaT,NaT
44120,AL012003,ANA,2003-04-21,2003,TS,31.4N,66.4W,50,995,NaT,NaT
49903,AL012013,ANDREA,2013-06-06,2013,TS,27.8N,84.9W,55,995,NaT,NaT
