In [1]:
import os
import re
import folium
import pandas as pd
import numpy as np
import datetime
from urllib.request import urlretrieve
from tqdm import tqdm_notebook as tqdm
from hurricane_functions import *

## Resources:
https://www.kaggle.com/poonaml/last-cab-to-new-york-animated-heatmap-trips-folium/

https://deparkes.co.uk/2016/06/03/plot-lines-in-folium/

## Load Data

In [2]:
def read_hurdat(url, local_fname, location):
    if not os.path.exists(local_fname):
        urlretrieve(url, local_fname)

    records = []
    with open(local_fname,'r') as f:
        for line in f:
            if line.startswith(location):
                record = line.strip()
                reports = []
                records.append((record, reports))
            else:
                reports.append(line.strip())
                
    return records

In [3]:
url = "https://www.nhc.noaa.gov/data/hurdat/hurdat2-1851-2017-050118.txt"
local_fname = "hurdat2.txt"

records = read_hurdat(url, local_fname, "AL") # AL for atlantic hurricanes

## Analyze hurricanes data

In [21]:
# Define all functions required for parsing the raw data and mapping hurricane paths

def get_all_hurricanes(records):
    """
    The raw records from hurdat2 is a list of lists. We want 
    to parse this into a neat dataframe for easy analysis.
    """
    hurr_list = []
    for hurricane in tqdm(records, total=len(records)):
        year = hurricane[0].split(',')[0][-4:]
        hurr_id = hurricane[0].split(',')[0]
        hurr_list.append(hurr_id)
            
    num_hurr = len(hurr_list)
    records_for_yr = records[-num_hurr:]
    
    return records_for_yr

def create_hurricane_df(records):
    """
    The raw data from NOAA is a text file. This function parses that
    text file into a nice, readable Pandas dataframe for easy manipulation.
    """
    records_df = pd.DataFrame()
    idx = 0

    for record in records:
        hurricane_id = record[0].split(',')[0]
        hurricane_name = record[0].split(',')[1].strip()

        for datapoint in record[1]:
            data_list = [x.strip() for x in datapoint.split(',')]
            record_date = data_list[0]
            time = data_list[1]
            storm_status = data_list[3]
            lat = data_list[4]
            lon = data_list[5]
            max_wind = data_list[6]
            min_pressure = data_list[7]

            # Add to df
            records_df.loc[idx, 'id'] = hurricane_id
            records_df.loc[idx, 'name'] = hurricane_name
            records_df.loc[idx, 'date'] = record_date
            records_df.loc[idx, 'time'] = time
            records_df.loc[idx, 'dt'] = datetime.datetime.strptime(record_date+time,'%Y%m%d%H%M')        
            records_df.loc[idx, 'storm_status'] = storm_status
            records_df.loc[idx, 'lat'] = convert_lat_lon(lat, 'lat')
            records_df.loc[idx, 'lon'] = convert_lat_lon(lon, 'lon')
            records_df.loc[idx, 'max_wind'] = float(max_wind)
            records_df.loc[idx, 'min_pressure'] = float(min_pressure)

            idx +=1
            
    return records_df


def convert_lat_lon(value, col):
    """
    Lat/lon is encoded with the numeric value plus E/W/N/S. We want to 
    convert this into an absolute decimal value for folium to interpret.
    """
    if col=='lon':
        amount = -float(re.sub('[EW]', '', value)) if 'W' in value else float(re.sub('[EW]', '', value))
    elif col=='lat':
        amount = -float(re.sub('[NS]', '', value)) if 'S' in value else float(re.sub('[NS]', '', value))
    
    return amount

def plot_path(df, folium_map):
    """
    Generates a tuple comprising of lat/lon for the 
    folium map to plot.
    """
    # Folium requires an array of tuples
    points = df[['lat', 'lon']]
    tuple_points = [tuple(x) for x in points.values]
    return tuple_points

def hurr_category(max_wind):
    """
    Defining storm category at a point in time.
    """
    if max_wind <= 73:
        cat = 'TS'
    if (74 <= max_wind <= 95):
        cat = 1
    if (96 <= max_wind <= 110):
        cat = 2
    if (111 <= max_wind <= 129):
        cat = 3
    if (130 <= max_wind <= 156):
        cat = 4
    if max_wind >= 157:
        cat = 5
    return cat

def plot_paths_for_year(df, folium_map):
    """
    Plots hurricane path for all hurricanes in the dataframe
    where lines are color coded for the intensity (category) at that
    point in time.
    """
    # Map colors of path based on category level
    path_colors = {1: '#ffb3b3',
                   2: '#ff8080',
                   3: '#e60000',
                   4: '#ff0000',
                   5: '#580808',
                  'TS': '#ffe6e6'}
    
    for name in df['name'].unique():
        name_df = df[df['name']==name]
        for i, dt in name_df.iterrows():
            day_df = name_df.ix[i:i+1, :]
            day_points = plot_path(day_df, folium_map)
            date = name_df.loc[i, 'date']
            time = name_df.loc[i, 'time']
            category = hurr_category(name_df.loc[i, 'max_wind'])
            maxwind = name_df.loc[i, 'max_wind']
            hurr_info = f"""
            Storm:    {name}
            Date:     {date}
            Time:     {time}
            Category: {category}
            Max Wind: {maxwind}mph
            """
            folium.PolyLine(day_points, tooltip=name, popup=hurr_info, color=path_colors[category]).add_to(folium_map)
            
    return

def get_highest_category(input_df):
    """
    Each row is a hurricane at a point in time. Category will vary
    depending on wind speed at the time. We want to have an identifier
    for what each hurricane's max recorded category was.
    """
    df = input_df.copy(deep=True)
    for hurricane in df['id'].unique():
        hurr_df = df[df['id']==hurricane]
        try:
            max_cat = max(hurr_df[hurr_df['category']!='TS']['category'])
        except ValueError:
            max_cat = 'TS'
        df_indexes = hurr_df.index
        df.loc[df_indexes, 'category_highest'] = max_cat
        
    return df

#### Create the hurdat Pandas dataframe

In [None]:
# Parse records for entire hurricane data history into dataframe
all_records = get_all_hurricanes(records)
all_hurricanes_df = create_hurricane_df(all_records)
all_hurricanes_df.to_csv('all_hurricanes_df.csv', index=False)

#### Load data if above is not run

In [2]:
# Load hurdat data formatted into dataframe
hurricanes_df = pd.read_csv("all_hurricanes_df.csv")
hurricanes_df['year'] = pd.DatetimeIndex(hurricanes_df['dt']).year
hurricanes_df.tail()

Unnamed: 0,id,name,date,time,dt,storm_status,lat,lon,max_wind,min_pressure,category,year
50298,AL192017,RINA,20171108,1200,2017-11-08 12:00:00,TS,38.3,-48.8,45.0,994.0,TS,2017
50299,AL192017,RINA,20171108,1800,2017-11-08 18:00:00,TS,40.1,-49.0,45.0,992.0,TS,2017
50300,AL192017,RINA,20171109,0,2017-11-09 00:00:00,TS,41.8,-48.8,45.0,991.0,TS,2017
50301,AL192017,RINA,20171109,600,2017-11-09 06:00:00,LO,43.6,-48.0,40.0,993.0,TS,2017
50302,AL192017,RINA,20171109,1200,2017-11-09 12:00:00,LO,45.5,-47.0,40.0,995.0,TS,2017


#### Add identifier if hurricane made landfall

In [18]:
from mpl_toolkits.basemap import Basemap

bm = Basemap()   # default: projection='cyl'
print(bm.is_land(99.675, 13.104))  #True
print(bm.is_land(100.539, 13.104))  #False

True
False


In [20]:
hurricanes_df['is_land'] = np.nan

for i, row in hurricanes_df.iterrows():
    hurricanes_df.loc[i, 'is_land'] = np.where(bm.is_land(hurricanes_df.loc[i, 'lat'], hurricanes_df.loc[i, 'lon'])==True, 1, 0)

In [21]:
hurricanes_df

Unnamed: 0,id,name,date,time,dt,storm_status,lat,lon,max_wind,min_pressure,category,year,is_land
0,AL011851,UNNAMED,18510625,0,1851-06-25 00:00:00,HU,28.0,-94.8,80.0,-999.0,1,1851,0.0
1,AL011851,UNNAMED,18510625,600,1851-06-25 06:00:00,HU,28.0,-95.4,80.0,-999.0,1,1851,0.0
2,AL011851,UNNAMED,18510625,1200,1851-06-25 12:00:00,HU,28.0,-96.0,80.0,-999.0,1,1851,0.0
3,AL011851,UNNAMED,18510625,1800,1851-06-25 18:00:00,HU,28.1,-96.5,80.0,-999.0,1,1851,0.0
4,AL011851,UNNAMED,18510625,2100,1851-06-25 21:00:00,HU,28.2,-96.8,80.0,-999.0,1,1851,0.0
5,AL011851,UNNAMED,18510626,0,1851-06-26 00:00:00,HU,28.2,-97.0,70.0,-999.0,TS,1851,0.0
6,AL011851,UNNAMED,18510626,600,1851-06-26 06:00:00,TS,28.3,-97.6,60.0,-999.0,TS,1851,0.0
7,AL011851,UNNAMED,18510626,1200,1851-06-26 12:00:00,TS,28.4,-98.3,60.0,-999.0,TS,1851,0.0
8,AL011851,UNNAMED,18510626,1800,1851-06-26 18:00:00,TS,28.6,-98.9,50.0,-999.0,TS,1851,0.0
9,AL011851,UNNAMED,18510627,0,1851-06-27 00:00:00,TS,29.0,-99.4,50.0,-999.0,TS,1851,0.0


### Compare hurricane paths in 2000-2017 and 1900-1917

In [10]:
hurr_2000s = hurricanes_df[hurricanes_df['year']>=2008]
hurr_2000s_map = folium.Map(location=[30, -60],
                            zoom_start = 4)

plot_paths_for_year(hurr_2000s, hurr_2000s_map)

# Save as html
hurr_2000s_map.save('maps/hurricane_paths_2008-2017.html')

In [11]:
hurr_1900s = hurricanes_df[(hurricanes_df['year']>=1918) & (hurricanes_df['year']<=1927)]
hurr_1900s_map = folium.Map(location=[30, -60],
                            zoom_start = 4)
plot_paths_for_year(hurr_1900s, hurr_1900s_map)

# Save as html
hurr_1900s_map.save('maps/hurricane_paths_1918-1927.html')

### TODO: 
* Plot all cat5 hurricanes in the last 10 years
* Pct of storms that made landfall(https://gis.stackexchange.com/questions/235133/checking-if-a-geocoordinate-point-is-land-or-ocean)
* Filter by storm for each state hit

# ---------------

## Analyzing data for all years

In [17]:
# keep named hurricanes only
hurricanes_df_named = hurricanes_df[hurricanes_df['name']!='UNNAMED']

# Get category for each data point
hurricanes_df_named['category'] = [hurr_category(cat) for cat in list(hurricanes_df_named['max_wind'])]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
