In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
# read in cleaned hurricane data
# hurricane_data = pd.read_json("../static/js/test.json")

In [3]:
# hurricane_data.head()

In [4]:
# since no column names on csv, need to name them
col_names = ["name", "date", "time_UTC", "record_identifier", "status", "latitude", "longitude", "max_wind_knots", "min_pressure_millibars", 
             "34kt_wind_radii_NE", "34kt_wind_radii_SE", "34kt_wind_radii_SW", "34kt_wind_radii_NW",
            "50kt_wind_radii_NE", "50kt_wind_radii_SE", "50kt_wind_radii_SW", "50kt_wind_radii_NW",
            "64kt_wind_radii_NE", "64kt_wind_radii_SE", "64kt_wind_radii_SW", "64kt_wind_radii_NW", "radius_max_wind"]

# read in csv with column names above
noaa_data = pd.read_csv("NOAA_reformatted.csv", header=None, names=col_names)

In [5]:
noaa_data

Unnamed: 0,name,date,time_UTC,record_identifier,status,latitude,longitude,max_wind_knots,min_pressure_millibars,34kt_wind_radii_NE,...,34kt_wind_radii_NW,50kt_wind_radii_NE,50kt_wind_radii_SE,50kt_wind_radii_SW,50kt_wind_radii_NW,64kt_wind_radii_NE,64kt_wind_radii_SE,64kt_wind_radii_SW,64kt_wind_radii_NW,radius_max_wind
0,,AL011851,UNNAMED,14,,,,,,,...,,,,,,,,,,
1,UNNAMED,18510625,0,,HU,28.0N,94.8W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
2,UNNAMED,18510625,600,,HU,28.0N,95.4W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
3,UNNAMED,18510625,1200,,HU,28.0N,96.0W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
4,UNNAMED,18510625,1800,,HU,28.1N,96.5W,80.0,-999.0,-999.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55432,WANDA,20211107,0,,TS,37.4N,37.4W,35.0,1003.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0
55433,WANDA,20211107,600,,TS,38.1N,36.4W,35.0,1004.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0
55434,WANDA,20211107,1200,,LO,39.2N,34.9W,35.0,1006.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0
55435,WANDA,20211107,1800,,LO,40.9N,32.8W,40.0,1006.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0


In [6]:
# get rid of unnecessary columns
wanted_cols = ["name", "date", "time_UTC", "latitude", "longitude", "status", "max_wind_knots"]
noaa_df = noaa_data[wanted_cols].copy()

# drop rows with NaN values (the row with the name of the hurricane has many of these)
noaa_df.dropna(inplace=True)

# make names all lowercase except for first letter
noaa_df["name"] = noaa_df["name"].str.lower()
noaa_df["name"] = noaa_df["name"].str.title()

# strip whitespace from beginning of names
noaa_df["name"] = noaa_df["name"].str.lstrip()

In [7]:
noaa_df

Unnamed: 0,name,date,time_UTC,latitude,longitude,status,max_wind_knots
1,Unnamed,18510625,0,28.0N,94.8W,HU,80.0
2,Unnamed,18510625,600,28.0N,95.4W,HU,80.0
3,Unnamed,18510625,1200,28.0N,96.0W,HU,80.0
4,Unnamed,18510625,1800,28.1N,96.5W,HU,80.0
5,Unnamed,18510625,2100,28.2N,96.8W,HU,80.0
...,...,...,...,...,...,...,...
55432,Wanda,20211107,0,37.4N,37.4W,TS,35.0
55433,Wanda,20211107,600,38.1N,36.4W,TS,35.0
55434,Wanda,20211107,1200,39.2N,34.9W,LO,35.0
55435,Wanda,20211107,1800,40.9N,32.8W,LO,40.0


In [8]:
# split up year, month and day
noaa_df["year"] = noaa_df["date"].str.slice(0,4)
noaa_df["month"] = noaa_df["date"].str.slice(4,6)
noaa_df["day"] = noaa_df["date"].str.slice(start=6)

noaa_df

Unnamed: 0,name,date,time_UTC,latitude,longitude,status,max_wind_knots,year,month,day
1,Unnamed,18510625,0,28.0N,94.8W,HU,80.0,1851,06,25
2,Unnamed,18510625,600,28.0N,95.4W,HU,80.0,1851,06,25
3,Unnamed,18510625,1200,28.0N,96.0W,HU,80.0,1851,06,25
4,Unnamed,18510625,1800,28.1N,96.5W,HU,80.0,1851,06,25
5,Unnamed,18510625,2100,28.2N,96.8W,HU,80.0,1851,06,25
...,...,...,...,...,...,...,...,...,...,...
55432,Wanda,20211107,0,37.4N,37.4W,TS,35.0,2021,11,07
55433,Wanda,20211107,600,38.1N,36.4W,TS,35.0,2021,11,07
55434,Wanda,20211107,1200,39.2N,34.9W,LO,35.0,2021,11,07
55435,Wanda,20211107,1800,40.9N,32.8W,LO,40.0,2021,11,07


In [9]:
# get rid of whitespace
noaa_df["latitude"] = noaa_df["latitude"].str.lstrip()
noaa_df["longitude"] = noaa_df["longitude"].str.lstrip()

# split up lat and long from direction
noaa_df["lat_coord"] = noaa_df["latitude"].str.slice(stop=-1)
noaa_df["lat_dir"] = noaa_df["latitude"].str.slice(start=-1)
noaa_df["long_coord"] = noaa_df["longitude"].str.slice(stop=-1)
noaa_df["long_dir"] = noaa_df["longitude"].str.slice(start=-1)

# make the coordinates floats
noaa_df["lat_coord"] = noaa_df["lat_coord"].astype(float)
noaa_df["long_coord"] = noaa_df["long_coord"].astype(float)

noaa_df

Unnamed: 0,name,date,time_UTC,latitude,longitude,status,max_wind_knots,year,month,day,lat_coord,lat_dir,long_coord,long_dir
1,Unnamed,18510625,0,28.0N,94.8W,HU,80.0,1851,06,25,28.0,N,94.8,W
2,Unnamed,18510625,600,28.0N,95.4W,HU,80.0,1851,06,25,28.0,N,95.4,W
3,Unnamed,18510625,1200,28.0N,96.0W,HU,80.0,1851,06,25,28.0,N,96.0,W
4,Unnamed,18510625,1800,28.1N,96.5W,HU,80.0,1851,06,25,28.1,N,96.5,W
5,Unnamed,18510625,2100,28.2N,96.8W,HU,80.0,1851,06,25,28.2,N,96.8,W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55432,Wanda,20211107,0,37.4N,37.4W,TS,35.0,2021,11,07,37.4,N,37.4,W
55433,Wanda,20211107,600,38.1N,36.4W,TS,35.0,2021,11,07,38.1,N,36.4,W
55434,Wanda,20211107,1200,39.2N,34.9W,LO,35.0,2021,11,07,39.2,N,34.9,W
55435,Wanda,20211107,1800,40.9N,32.8W,LO,40.0,2021,11,07,40.9,N,32.8,W


In [10]:
# create an id column in noaa data frame so we can compare
noaa_df["id"] = noaa_df["name"] + " " + noaa_df["year"] + noaa_df["month"].astype(str)

In [11]:
# hurricane_data.head()

In [12]:
noaa_df.head()

Unnamed: 0,name,date,time_UTC,latitude,longitude,status,max_wind_knots,year,month,day,lat_coord,lat_dir,long_coord,long_dir,id
1,Unnamed,18510625,0,28.0N,94.8W,HU,80.0,1851,6,25,28.0,N,94.8,W,Unnamed 185106
2,Unnamed,18510625,600,28.0N,95.4W,HU,80.0,1851,6,25,28.0,N,95.4,W,Unnamed 185106
3,Unnamed,18510625,1200,28.0N,96.0W,HU,80.0,1851,6,25,28.0,N,96.0,W,Unnamed 185106
4,Unnamed,18510625,1800,28.1N,96.5W,HU,80.0,1851,6,25,28.1,N,96.5,W,Unnamed 185106
5,Unnamed,18510625,2100,28.2N,96.8W,HU,80.0,1851,6,25,28.2,N,96.8,W,Unnamed 185106


In [13]:
# create list of hurricane ids from cleaned dataset to compare with noaa dataset
# hurr_names = hurricane_data["Name_year"].unique().tolist()

In [14]:
# # get only the rows with the same hurricanes as in our cleaned dataset
# noaa_match_names = noaa_df.loc[noaa_df["id"].isin(hurr_names)]
# noaa_match_names = noaa_match_names.reset_index(drop=True)
# noaa_match_names

In [15]:
# clean up the dataframe for export

# convert coordinates to pos and neg based on direction
noaa_df["lat_coord"] = np.where(noaa_df["lat_dir"] == "S", noaa_df["lat_coord"]*-1, noaa_df["lat_coord"])
noaa_df["long_coord"] = np.where(noaa_df["long_dir"] == "W", noaa_df["long_coord"]*-1, noaa_df["long_coord"])

In [16]:
noaa_df

Unnamed: 0,name,date,time_UTC,latitude,longitude,status,max_wind_knots,year,month,day,lat_coord,lat_dir,long_coord,long_dir,id
1,Unnamed,18510625,0,28.0N,94.8W,HU,80.0,1851,06,25,28.0,N,-94.8,W,Unnamed 185106
2,Unnamed,18510625,600,28.0N,95.4W,HU,80.0,1851,06,25,28.0,N,-95.4,W,Unnamed 185106
3,Unnamed,18510625,1200,28.0N,96.0W,HU,80.0,1851,06,25,28.0,N,-96.0,W,Unnamed 185106
4,Unnamed,18510625,1800,28.1N,96.5W,HU,80.0,1851,06,25,28.1,N,-96.5,W,Unnamed 185106
5,Unnamed,18510625,2100,28.2N,96.8W,HU,80.0,1851,06,25,28.2,N,-96.8,W,Unnamed 185106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55432,Wanda,20211107,0,37.4N,37.4W,TS,35.0,2021,11,07,37.4,N,-37.4,W,Wanda 202111
55433,Wanda,20211107,600,38.1N,36.4W,TS,35.0,2021,11,07,38.1,N,-36.4,W,Wanda 202111
55434,Wanda,20211107,1200,39.2N,34.9W,LO,35.0,2021,11,07,39.2,N,-34.9,W,Wanda 202111
55435,Wanda,20211107,1800,40.9N,32.8W,LO,40.0,2021,11,07,40.9,N,-32.8,W,Wanda 202111


In [17]:
# columns to keep for json file
cols_to_keep = ["name", "time_UTC", "year", "month", "day", "lat_coord", "long_coord", "id", "status", "max_wind_knots"]
final_df = noaa_df[cols_to_keep].copy()

# reset the index
final_df = final_df.reset_index(drop=True)

# group by id
grouped = final_df.groupby("id")

# get list of hurricane ids
hurricane_ids = final_df["id"].unique().tolist()

In [18]:
final_dict = {}

for hurricane in hurricane_ids:
    hurricane_name = hurricane.split()[0]
    
    lat_coords = grouped.get_group(hurricane)["lat_coord"].tolist()
    long_coords = grouped.get_group(hurricane)["long_coord"].tolist()
    
    coords = []
    for i in range(len(lat_coords)):
        coords.append([lat_coords[i], long_coords[i]])
        
    hurr_dict = {
        "name": hurricane_name,
        "time_UTC": grouped.get_group(hurricane)["time_UTC"].tolist(),
        "year": grouped.get_group(hurricane)["year"].tolist(),
        "month": grouped.get_group(hurricane)["month"].tolist(),
        "day": grouped.get_group(hurricane)["day"].tolist(),
#         "coords": {"lat": grouped.get_group(hurricane)["lat_coord"].tolist(),
#                 "long": grouped.get_group(hurricane)["long_coord"].tolist()},
        "coords": coords,
        "id": grouped.get_group(hurricane)["id"].tolist(),
        "status": grouped.get_group(hurricane)["status"].tolist(),
        "max_wind_knots": grouped.get_group(hurricane)["max_wind_knots"].to_list()
    }
    final_dict[hurricane] = hurr_dict

In [19]:
# write our results as a json
with open("hurricane_path.json", "w") as outfile:
    json.dump(final_dict, outfile)

In [20]:
final_dict

{'Unnamed 185106': {'name': 'Unnamed',
  'time_UTC': ['0',
   '600',
   '1200',
   '1800',
   '2100',
   '0',
   '600',
   '1200',
   '1800',
   '0',
   '600',
   '1200',
   '1800',
   '0'],
  'year': ['1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851',
   '1851'],
  'month': ['06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06',
   '06'],
  'day': ['25',
   '25',
   '25',
   '25',
   '25',
   '26',
   '26',
   '26',
   '26',
   '27',
   '27',
   '27',
   '27',
   '28'],
  'coords': [[28.0, -94.8],
   [28.0, -95.4],
   [28.0, -96.0],
   [28.1, -96.5],
   [28.2, -96.8],
   [28.2, -97.0],
   [28.3, -97.6],
   [28.4, -98.3],
   [28.6, -98.9],
   [29.0, -99.4],
   [29.5, -99.8],
   [30.0, -100.0],
   [30.5, -100.1],
   [31.0, -100.2]],
  'id': ['Unnamed 185106',
   'Unnamed 185106',
   'Unnamed 185106',
   'Unnamed 185106',
   'Unnamed 185106',