In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()

import netCDF4
import time
import xarray

In [None]:
""" Read .nc file into pandas dataframe."""
#Runtime around 452 seconds using perf_counter.

tic = time.perf_counter()

nc_ibtracs = 'Allstorms.ibtracs_all.v03r10.nc'
path_to_nc = os.path.join('..','deconstruct_cyn',nc_ibtracs)

ibt_all_fields = xarray.open_dataset(path_to_nc).to_dataframe()
# name, storm_sn, time_record, landfall, 
# genesis_basin, season, 
# lat, lon, max_wind, min_pres, dist2land are the interesting variables

columns_we_want = ['storm_sn',
                   'name',
                   'source_time',
                   'landfall',
                   'genesis_basin',
                   'season',
                   'source_lat',
                   'source_lon',
                   'source_wind',
                   'source_pres',
                   'dist2land']

ibt = ibt_all_fields[columns_we_want]

del ibt_all_fields

renamed_columns = ['storm_sn',
                   'name',
                   'time_recorded',
                   'landfall',
                   'genesis_basin',
                   'season',
                   'lat',
                   'lon',
                   'max_wind',
                   'min_pres',
                   'dist2land']

columns = dict(zip(columns_we_want, renamed_columns))

ibt2 = ibt.rename(index=str, columns = columns)

del ibt

toc = time.perf_counter()

elapsed = toc - tic
print(elapsed)
#ibt

In [None]:
""" Import landmask."""
#Elapsed time ~10 seconds.
tic = time.process_time()

nc_landmask = 'ETOPO1_Ice_g_gmt4.nc' #Land mask from ETOPO1 Global Relief Model
path_to_nc2 = os.path.join('..','deconstruct_cyn',nc_landmask)

landmask_all_fields = xarray.open_dataset(path_to_nc2).to_dataframe()

toc = time.process_time()

elapsed = toc - tic
print(elapsed)

# Creating a Boolean Mask for the Phillipines

Here we want to create a bounding box for the phillipines. Note that the box made by the previous project team had sloped sides, so this approach will work for an arbitrary quadrilateral.

In [None]:
lm2 = landmask_all_fields.reset_index()

In [None]:
eps = 1e-3

m1 = 1/5
m2 = -130/53
m3 = 27/56
m4 = -73/26

c1 = -5.38
c2 = +320.65
c3 = -55.61
c4 = +347.72

In [None]:
lat_within_a = (18.1-eps <= lm2.lat) & (lm2.lat <= (19.2+eps))
lat_within_a.value_counts()

In [None]:
lon_left_a = (lm2.lat - c1)/m1
lon_right_a = (lm2.lat - c2)/m2
lon_within_a = (lon_left_a <= lm2.lon) & (lm2.lon <= lon_right_a)
lon_within_a.value_counts()

In [None]:
point_within_a = lat_within_a & lon_within_a
point_within_a.value_counts()

In [None]:
lat_within_b = (6.2 <= lm2.lat) & (lm2.lat <= 18.1)
lat_within_b.value_counts()

In [None]:
lon_left_b = (lm2.lat - c4)/m4
lon_right_b = (lm2.lat - c2)/m2
lon_within_b = (lon_left_b <= lm2.lon) & (lm2.lon <= lon_right_b)
lon_within_b.value_counts()

In [None]:
point_within_b = lat_within_b & lon_within_b
point_within_b.value_counts()

In [None]:
lat_within_gamma = (3.484 <= lm2.lat) & (lm2.lat <= 6.2) #originally 3.5 - 6.2
lat_within_gamma.value_counts()

In [None]:
lon_left_gamma = (lm2.lat - c4)/m4
lon_right_gamma = (lm2.lat - c3)/m3
lon_within_gamma = (lon_left_gamma <= lm2.lon) & (lm2.lon <= lon_right_gamma)
lon_within_gamma.value_counts()

In [None]:
point_within_gamma = lat_within_gamma & lon_within_gamma
point_within_gamma.value_counts()



In [None]:
point_within_phillipines_box = (point_within_a | point_within_b | point_within_gamma)
point_within_phillipines_box.value_counts()

In [None]:
lm2['phil_box'] = point_within_phillipines_box

## Alternate History Branch

Just skip this part and go directly to cyclone events to get testing2 back out. Here I am trying to merge all the etopo data into the ibtracs database before I filter based on phil_box.

In the following section "Selecting Correct Cyclone Events", I try to filter etopo to only use altitudes within the phillipines bounding box, this leads to 14,999 cyclone measurements being selected within the box, for all non-null z (z can be negative).

Hopefully, reordering the transformations of the data won't alter the final result.

In [None]:
ibt2.head()

In [None]:
lm2.head()

In [None]:
N = 10000

# ibt2['lat'] and ibt2['lon'] columns have a float datatype.
# It is very difficult to run a pd.merge() on these columns.
# The float has rounding errors, running np.allclose() won't
# fix these,

# To fix this, multiply by a big power of 10, convert to nullable integer type.
# pd.merge() on integer indexed columns.
# Then convert back to float.

ibt2['lat'] = np.round(ibt2['lat']* N).astype(pd.Int64Dtype())
ibt2['lon'] = np.round(ibt2['lon']* N).astype(pd.Int64Dtype())

lm2['lat'] = np.round(lm2['lat'] * N).astype(pd.Int64Dtype())
lm2['lon'] = np.round(lm2['lon']* N).astype(pd.Int64Dtype())

flag_merge = pd.merge(ibt2, lm2, how = 'left', on = ['lat', 'lon'])

ibt2['lat'] = ibt2['lat']/N;
ibt2['lon'] = ibt2['lon']/N;

lm2['lat'] = lm2['lat']/N;
lm2['lon'] = lm2['lon']/N;

In [None]:
flag_merge.head()

In [None]:
flag_merge[flag_merge.phil_box == True]

In [None]:
flag_merge.loc[pd.unique(flag_merge['storm_sn']),:]

In [None]:
flag_merge['storm_sn'].unique

# Selecting Correct Cyclone Events

So we have cyclone centres with lat, lon values. We want to measure whether a cyclone has reached land. A simple approach would be to check when the eye of the storm comes into contact with land.

Unfortunately, we can't look up the location of the eye in the etopo model, because our index is not continuous. Perhaps we need to interpolate?

In [None]:
phil = lm2[lm2.phil_box]

In [None]:
phil.head()

In [None]:
phil.describe()

In [None]:
ibt2[ibt2.season > 2016].dropna().head()

In [None]:
ibt3 = pd.merge(ibt2, phil[['lat','lon','z']], how = 'left', on=['lat','lon'])

In [None]:
ibt3.describe()

In [None]:
ibt2.describe()

In [None]:
ibt4 = pd.merge(ibt2, phil[['lat','lon','z']], how = 'right', on=['lat','lon'])

In [None]:
ibt4.describe()

In [None]:
phil.z.shape

In [None]:
ibt5 = pd.merge(ibt2, phil[['lat','lon','z']], how = 'outer', on=['lat','lon'])

In [None]:
ibt5.describe()

In [None]:
ibhack = ibt2.copy()
phack = phil[["lat","lon","z"]].copy()

In [None]:
ibhack.describe()

In [None]:
N = 10000

ibhack['lat'] = np.round(ibhack['lat']* N).astype(pd.Int64Dtype())
ibhack['lon'] = np.round(ibhack['lon']* N).astype(pd.Int64Dtype())

phack['lat'] = np.round(phack['lat'] * N).astype(pd.Int64Dtype())
phack['lon'] = np.round(phack['lon']* N).astype(pd.Int64Dtype())

goodmerge = pd.merge(ibhack, phack, how = 'left', on = ['lat', 'lon'])

ibhack['lat'] = ibhack['lat']/N;
ibhack['lon'] = ibhack['lon']/N;

phack['lat'] = phack['lat']/N;
phack['lon'] = phack['lon']/N;

In [None]:
#Use lm2 here tomorrow morning

In [None]:
ibhack.describe()

In [None]:
del ibt5

In [None]:
del ibt4

In [None]:
phack.astype?

In [None]:
ibt2.describe()

In [None]:
goodmerge.describe()

In [None]:
testing = goodmerge[goodmerge.z.isnull()]

In [None]:
testing[testing["lat"].notnull()]

In [None]:
goodmerge.lat.dropna()/N

In [None]:
testing2 = goodmerge[goodmerge.z.notnull()]

In [None]:
testing2[testing2.lat.notnull()]

In [None]:
testing2['lat'] = testing2['lat']/N
testing2['lon'] = testing2['lon']/N

In [None]:
testing2.describe()