# Hurricane Season Analysis

## Data Gathering

### Pseudocode
1.  import dependencies
2.  get csv file
3.  read csv file into pandas
4.  display
5.  cleanup/delete unnecessary columns

In [1]:
import pandas as pd
from datetime import datetime, timedelta
import csv
import matplotlib.pyplot as plt
import numpy as np
file = "hurricane_data.csv"

In [2]:
# Add headerrow containing storm id and storm name to data rows and create list
hurricane_data = []

storm_id = "0000000"
storm_name = "UNNAMED"
with open(file) as hurricane_file:
    hurricane_reader = csv.reader(hurricane_file)
    next(hurricane_reader, None) 
    for row in hurricane_reader:
                if 'AL' in row[0]: 
                    storm_id = row[0].strip()
                    storm_name = row[1].strip()
#                     print(f'{storm_id}: {storm_name}')
                else:
                    oldformat = row[0]+row[1]
                    datetimeobject = datetime.strptime(oldformat,'%Y%m%d %H%M%S')
                    date = datetimeobject.strftime('%m-%d-%Y %H%M%S')
                    year = row[0][:4]
                    landfall = row[2].strip()
                    status = row[3].strip()
                    latitude = row[4].strip()
                    longitude = row[5].strip()
                    wind = row[6].strip()
                    pressure = row[7].strip()
                hurricane_data.append(
                    {
                        "Storm_Id": storm_id, 
                        "Name": storm_name,
                        "Date": date,
                        "Year": year,
                        "Landfall": landfall,
                        "Status" : status,
                        "Latitude" : latitude,
                        "Longitude" : longitude,
                        "Windspeed" : wind,
                        "Pressure" : pressure
                    }
                )    
                           
    

FileNotFoundError: [Errno 2] No such file or directory: 'hurricane_data.csv'

In [None]:
# Create Dataframe and reorder columns
hurricane_pd = pd.DataFrame(hurricane_data)
hurricane_pd = hurricane_pd.loc[:, ["Storm_Id", "Name", "Date", "Year", "Status", "Latitude",
                                    "Longitude", "Windspeed", "Pressure", "Landfall"]]
hurricane_pd.head()

# Data Cleanup

### Pseudocode
1. Check column counts for missing data
2. Check/Change column types for type conversion
3. Limit data to only the past 15 years, only Hurricanes and Tropical Storms, and only named storms
4. Get min date (when storm became Tropical Storm) and Max Date (When storm is no longer a tropical storm)
5. Calculate the duration of storm
6. Find nearest city for storms tha reached landfall
7. Get max windspeed row and return all columns
8. Categorize storms according to Saffir-Simpson scale https://en.wikipedia.org/wiki/Maximum_sustained_wind 

In [None]:
#Check column counts for missing data
hurricane_pd.count()

In [None]:
#Check column types for needed type conversions
hurricane_pd.dtypes

In [None]:
#Change types for Windspeed and Pressure to numerice and Date to Datetime
hurricane_pd['Windspeed'] = pd.to_numeric(hurricane_pd['Windspeed'])
hurricane_pd['Pressure'] = pd.to_numeric(hurricane_pd['Pressure'])
hurricane_pd['Date'] = pd.to_datetime(hurricane_pd['Date'])

In [None]:
# Limit data to only the past 15 years, 
# Only Hurricanes and Tropical Storms, and 
# Only named storms

hurricane_df_clean = hurricane_pd.loc[hurricane_pd["Year"] >= "1982"]
hurricane_df_clean = hurricane_df_clean.loc[hurricane_df_clean['Status'].isin(['TS','HU'])]
hurricane_df_clean = hurricane_df_clean.loc[hurricane_df_clean['Name'] != "UNNAMED"]
hurricane_df_clean.head()

In [None]:
# Get min date (when storm became Tropical Storm) and Max Date (When storm is no longer a tropical storm)
# Calculate the duration

storm_gb = hurricane_df_clean.groupby('Storm_Id')
storm_sgb = storm_gb['Date']
start_date = storm_sgb.min()
end_date = storm_sgb.max()
duration = end_date - start_date

In [None]:
#Merge Start Date, End Date, and Duration to original dataframe. 
start_end_df = pd.DataFrame({"Start Date": start_date
                             ,"End Date": end_date
                             ,"Duration" : duration
                            }).reset_index()

merge_df = pd.merge(hurricane_df_clean, start_end_df, how="outer", on="Storm_Id")

merge_df.head(100)


In [None]:
# Filter only storms that reached Landfall and create dataframe
landfall_df = merge_df.loc[merge_df['Landfall'] == "L"]

In [None]:
# Find the nearest city to the Landfall cooridinates using the Citipy
from citipy import citipy

# Strip the Direction from the Latitude and Longitude
lats = landfall_df["Latitude"].str.split("([A-Z]+)", expand=True)
lons = landfall_df["Longitude"].str.split("([A-Z]+)", expand=True)

# Grab the number from index 0
lats = lats[0]
lons = lons[0]

# Use citipy to find the nearest city
landfall_df.loc[:, "Latitude"] = lats
landfall_df.loc[:, "Longitude"] = lons

# Change the column to numeric
landfall_df["Latitude"] = pd.to_numeric(landfall_df["Latitude"])
landfall_df["Longitude"] = pd.to_numeric(landfall_df["Longitude"])

# Convert Longitude column to negative
landfall_df["Longitude"] *= -1


# Use citipy to find the nearest city
latitude = landfall_df["Latitude"]
longitude = landfall_df["Longitude"]
coordinates = zip(latitude, longitude)
cities = []
for coordinate_pair in coordinates:
    lat, lon = coordinate_pair
    cities.append(citipy.nearest_city(lat,lon))

In [None]:
# Add city name column to Landfall dataframe
city_name = []
for city in cities:
    name = city.city_name
    city_name.append(name)
landfall_df.loc[:, "Nearest City"] = city_name
landfall_df.head()

In [None]:
# Merge landfall and original dataframe 
storm_added_fields = pd.merge(merge_df, landfall_df, how="outer", on="Storm_Id")

storm_added_fields = storm_added_fields.loc[:, ["Storm_Id", "Name_x", "Date_x", "Year_x", "Status_x", "Latitude_x", "Longitude_x"
                                    ,"Windspeed_x", "Pressure_x", "Start Date_x", "End Date_x", "Duration_x", "Landfall_y"
                                    ,"Latitude_y", "Longitude_y", "Windspeed_y", "Nearest City"]]

storm_added_fields_df = storm_added_fields.rename(columns={"Storm_Id": "Storm ID", "Name_x":"Name", "Date_x":"Date", "Year_x": "Year", "Status_x":"Status"
                                                       ,"Latitude_x" : "Max Latitude", "Longitude_x": "Max Longitude"
                                                       ,"Windspeed_x" : "Max Windspeed", "Pressure_x" : "Max Pressure"
                                                       ,"Start Date_x" : "Start Date", "End Date_x" : "End Date", "Duration_x" : "Duration"
                                                       ,"Landfall_y" : "Landfall", "Latitude_y" : "Lf Latitude", "Longitude_y" : "Lf Longitude"
                                                       ,"Windspeed_y" : "Lf Windspeed", "Nearest City": "Nearest City"})


storm_added_fields_df.head()

In [None]:
# Find row with max windspeed and return all columns in that row.
clean_storm_df = storm_added_fields_df.iloc[storm_added_fields_df.reset_index().groupby(['Storm ID'])["Max Windspeed"].idxmax()]
clean_storm_df.head()

In [None]:
# Create Bins for each storm category according to https://en.wikipedia.org/wiki/Maximum_sustained_wind
min_wind = clean_storm_df["Max Windspeed"].min()
print(min_wind)
bins = [33, 63, 82, 95, 112, 136, 170]

# Create the names for the four bins
category_names = ['Tropical Storm', 'Category One', 'Category Two', 'Category Three', 'Category Four', 'Category Five']
category_values = [0,1, 2, 3, 4, 5]

In [None]:
# Create new category column
storm_category = pd.cut(clean_storm_df["Max Windspeed"], bins, labels=category_names)
category_value = pd.cut(clean_storm_df["Max Windspeed"], bins, labels=category_values)

In [None]:
# Add column to clean storm dataframe
clean_storm_df["Storm Category"] = storm_category
clean_storm_df["Category Value"] = category_value
clean_storm_df['Category Value'] = pd.to_numeric(clean_storm_df['Category Value'])
clean_storm_df.head()

# Quantity of Storms Over the Years

In [None]:
# Separate max windspeed, pressure and category data by year and get the average
grouped = clean_storm_df.groupby(['Year'])
over3 = clean_storm_df.loc[clean_storm_df['Category Value'] >=3,:]
over3_grouped = over3.groupby(['Year'])

# Set variables
quantity = grouped['Duration'].count()
quantity_over3 = over3_grouped['Duration'].count()

# Plot total quantity
x_axis = np.arange(0,len(clean_storm_df['Year'].unique())*2,2)
plt.plot(quantity)
plt.xlabel('Year')
plt.ylabel('Number of Storms')
plt.title("Total Number of Storms per Year")
plt.ylim(0,30)
plt.xlim(-1,36)
plt.xticks(rotation='vertical')
plt.grid()

plt.tight_layout()
plt.show()

In [None]:
# Plot storms category 3 or stronger
plt.plot(quantity_over3)
plt.xlabel('Year')
plt.ylabel('Number of Storms Category 3 or Stronger')
plt.title("Total Number of Storms Category 3 or Stronger per Year")
plt.ylim(0,8)
plt.xticks(rotation='vertical')
plt.grid()

plt.tight_layout()
plt.show()

# Strength of Storms Over the Years

In [None]:
# Create a line graph showing quantity of storms over time
# Separate max windspeed, pressure and category data by year and get the average
strength_df = clean_storm_df[["Year","Max Windspeed","Max Pressure","Duration"]]
strength_df['Duration'] = pd.to_numeric(strength_df['Duration'])
strength_year = strength_df.groupby(['Year'])

# Set variables
max_windspeed_avg = strength_year['Max Windspeed'].mean()
max_pressure_avg = strength_year['Max Pressure'].mean()
duration_avg = strength_year['Duration'].mean()

# Create a line graph showing windspeed of storms over time
# Plot
graph = plt.plot(max_windspeed_avg)
plt.xlabel('Year')
plt.ylabel('Average Max Windspeed')
plt.title("Average Max Windspeed Over the Last 15 Years")
plt.xticks(rotation='vertical')
plt.grid()
 
plt.tight_layout()
plt.show()

In [None]:
# Create a line graph showing pressure of storms over time
graph = plt.plot(max_pressure_avg)
plt.xlabel('Year')
plt.ylabel('Average Max Pressure')
plt.title("Average Max Pressure Over the Last 15 Years")
plt.xticks(rotation='vertical')
plt.grid()
 
plt.tight_layout()
plt.show()

In [None]:
# Create a line graph showing duration of storms over time
# Separate max windspeed data by year and get the average
graph = plt.plot(duration_avg)
plt.xlabel('Year')
plt.ylabel('Average Duration')
plt.title("Average Duration Over the Years")
plt.xticks(rotation='vertical')
plt.grid()
 
plt.tight_layout()
plt.show()

# Most Dangerous City in Relationship to Storms

In [None]:
# Bar grapth
city_count = clean_storm_df["Nearest City"].value_counts()
top_cities = city_count.head(20)
sorted_cities = top_cities.sort_values(ascending=False)

plt.bar(sorted_cities.index,sorted_cities)
plt.xticks(rotation='vertical')

In [None]:
# Bar graph with average strength of storms

# Filter for only top 20 cities
# Group by cities to get count and avg max winspeed of the storms that hit each city
city_count = clean_storm_df.groupby("Nearest City")['Storm ID'].count()
city_wind = clean_storm_df.groupby("Nearest City")['Max Windspeed'].mean()

# Create data frame
cities_df = pd.DataFrame({"Number of Storms": city_count,"Avg Max Windspeed": city_wind})
# Sort to find cities with the most storms
cities_df = cities_df.sort_values('Number of Storms', ascending =False)
# Keep only cities with 4 storms or more
cities_df = cities_df.loc[cities_df['Number of Storms'] >= 4,:]

cities_df

# Storm Season

In [None]:
'''windspeed = []
strength_list = []

for city in sorted_cities.index:
    city_df = clean_storm_df.loc[clean_storm_df["Nearest City"] == city,:]
    windspeed = city_df["Max Windspeed"].
    cities = city
    #print(windspeed)
    
    #Create a dictionaty of results
    filtered_dict = {
            #"City": city,
            "Max Windspeed": windspeed
            }
    strength_list.append(filtered_dict)
    #print(city)
    #print(city)
    #print(city_df["Max Windspeed"])'''

# Storm Duration

In [None]:
# Create DataFrame from Results List
results_df = pd.DataFrame(strength_list)

results_df

# Hurricane Season

# Storm Duration

In [None]:
# Save image files
# Figure out how to get rid of the red messages