This scratch file enables us to find out more about the data structures in Cynthia's code.
Can we save these files and massively speed up load time before we split up the project into subfiles.

In [None]:
"""Using 'all' file instead of 'wmo' file"""
import numpy as np 
import pandas as pd
import pickle as pkl
from netCDF4 import Dataset,num2date
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import norm, poisson, lognorm, chisquare, linregress, ttest_ind, power_divergence, ks_2samp, chi2_contingency
from scipy.interpolate import interp1d
from scipy.special import factorial
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
#%matplotlib tk    #Uncomment for interactive figures
import geopy.distance as gd
from sklearn.metrics import mean_squared_error
import time
import os

In [None]:
'''Read .nc file, credits: Theo Rashid'''
tic = time.time() #Expect runtime ~5 min

nc_ibtracs = 'Allstorms.ibtracs_all.v03r10.nc' #Using 'all' file
path_to_nc_ibtracs = os.path.join('..','..','deconstruct_cyn',nc_ibtracs)
ibt = Dataset(path_to_nc_ibtracs)

name          = ibt.variables['name'][:] #Name of TC
storm_sn      = ibt.variables['storm_sn'][:] 
time_record   = ibt.variables['source_time'][:] #Time of record
landfall      = ibt.variables['landfall'][:]

genesis_basin = ibt.variables['genesis_basin'][:]
season        = ibt.variables['season'][:]

lat           = ibt.variables['source_lat'][:] #Latitude of TC centre
lon           = ibt.variables['source_lon'][:] #Longitude of TC centre
max_wind      = ibt.variables['source_wind'][:] #Max. wind speed
min_pres      = ibt.variables['source_pres'][:] #Central pressure
dist2land     = ibt.variables['dist2land'][:] #Distance to land
    

nc_landmask = 'ETOPO1_Ice_g_gmt4.nc' #Land mask from ETOPO1 Global Relief Model
path_to_nc_landmask = os.path.join('..','..','deconstruct_cyn',nc_landmask)
land = Dataset(path_to_nc_landmask)

landmask_lon = land.variables['lon'][:] #Longitude points of land mask
landmask_lat = land.variables['lat'][:] #Latitude points
z = land.variables['z'][:] #Altitude
landmask_lat_mesh, landmask_lon_mesh = np.meshgrid(landmask_lat, landmask_lon) #Create meshgrid for getting and plotting each point

source_list = [10,14,19,21] #These are data from ibt, 10 is jtwc_wp, #14 is cma, #19 is wmo tokyo, #21 is hko
LAT         = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))
LON         = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))
MAX_WIND    = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))
MIN_PRES    = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))
DIST2LAND   = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))
TIME_RECORD = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))
LANDFALL    = np.nan * np.ones(shape=(np.size(lat,0),np.size(lat,1),len(source_list)))

for INDEX_RECORD_SOURCE in source_list: 
    LAT      [:,:,source_list.index(INDEX_RECORD_SOURCE)] = lat[:,:,INDEX_RECORD_SOURCE] 
    LON      [:,:,source_list.index(INDEX_RECORD_SOURCE)] = lon[:,:,INDEX_RECORD_SOURCE]
    MAX_WIND [:,:,source_list.index(INDEX_RECORD_SOURCE)] = max_wind[:,:,INDEX_RECORD_SOURCE]
    MIN_PRES [:,:,source_list.index(INDEX_RECORD_SOURCE)] = min_pres[:,:,INDEX_RECORD_SOURCE]
    
    DIST2LAND   = dist2land.data[:]
    TIME_RECORD = time_record.data[:]
    LANDFALL    = landfall.data[:].astype(float)

TCinseason = [] #Index of TCs occuring on or after 2014, needed for rainfall
for i in range(len(list(season))):
    if list(season)[i] >= 2014:
        TCinseason.append(i)

toc = time.time()
print(toc - tic)

In [None]:
cell_2 = [name, storm_sn, time_record, landfall, genesis_basin, season, lat, lon, max_wind, min_pres, dist2land,
         landmask_lon, landmask_lat, z, landmask_lat_mesh, landmask_lon_mesh, LAT, LON, MAX_WIND, MIN_PRES, DIST2LAND, 
          TIME_RECORD, LANDFALL, TCinseason]

for i in cell_2:
    print(type(i), len(i))

In [None]:
tic = time.perf_counter()

to_save = [time_record, season, landmask_lon, landmask_lat, z, 
           landmask_lat_mesh, landmask_lon_mesh, LAT, LON, MAX_WIND, MIN_PRES, DIST2LAND, TCinseason]

names_to_save = ['time_record','season','landmask_lon','landmask_lat','z',
                 'landmask_lat_mesh','landmask_lon_mesh','LAT','LON','MAX_WIND','MIN_PRES','DIST2LAND','TCinseason']

for data_structure, data_name in zip(to_save,names_to_save):
    if (isinstance(data_structure, list) 
        or isinstance(data_structure, np.ma.core.MaskedArray) 
        or isinstance(data_structure, np.ndarray)):
        path_to_data = os.path.join('data',data_name + '.pickle')
        with open(path_to_data, 'wb') as f:
            pkl.dump(data_structure, f) 
            print('saving {} as .pickle'.format(data_name))

    else:
        raise(ValueError,'Expected masked array, array or list. Got: {}'.format(type(data_structure)))
        
toc = time.perf_counter()
print(toc - tic)

In [None]:
#Get daily rainfall amount from .nc file
tic = time.perf_counter()

def getfile(tcindex, area):
    filedirectory = os.path.join('IMERG',str(tcindex) + str(area))
    path_to_imerg = os.path.join('..','..','deconstruct_cyn',filedirectory)

    totalprecipitation = np.zeros((450,379))
    
    #Sum daily rainfall amount from each file
    for file in os.listdir(path_to_imerg):
        if file.endswith('.nc'):
            nc_imerg = os.path.join(path_to_imerg, file)
            imerg = Dataset(nc_imerg)

            precipitationCal = imerg.variables['precipitationCal'][:] #Rainfall
            lat_rainfall = imerg.variables['lat'][:] #Latitude range
            lon_rainfall = imerg.variables['lon'][:] #Longitude range
            totalprecipitation += precipitationCal
        
    return totalprecipitation, lat_rainfall, lon_rainfall

totalprecipitation = getfile(12733,1)[0]
lat_rainfall = getfile(12733,1)[1]
lon_rainfall = getfile(12733,1)[2]

toc = time.perf_counter()
print(toc - tic)

In [None]:
cell_20 = [totalprecipitation, lat_rainfall, lon_rainfall]

for i in cell_20:
    print(type(i), i.shape)

In [None]:
#Input radii of 18m/s wind (R18) data, also known as radii of 34kts wind (R34)
tic = time.perf_counter()

r18dict_GD_jtwc = {} #Values in nautical mile, change to km
r18dict_GD_jtwc['11290'] = [] #Utor 2001, no data, from bwp0602001.txt
r18dict_GD_jtwc['11298'] = []#[float((60+90+120+60)*1.852/4), 'no data']#Yutu 2001, from bwp102001.txt
r18dict_GD_jtwc['11318'] = [float((70+70+40+40)*1.852/4), float((85+85+60+60)*1.852/4)] #Nari 2001, from bwp202001.txt
r18dict_GD_jtwc['11410'] = [float((60+120+120+70)*1.852/4), float((70+120+120+70)*1.852/4)] #Kammuri 2002, from bwp162002.txt
r18dict_GD_jtwc['11417'] = [float((60+70+70+60)*1.852/4), float((60+70+70+60)*1.852/4)] #Vongfong 2002, from bwp202002.txt
r18dict_GD_jtwc['11431'] = [] #Hagupit 2002, no data, from bwp232002.txt
r18dict_GD_jtwc['11508'] = []#[float((160+160+140+140)*1.852/4), 'no data'] #Imbudo 2003, from bwp092003.txt
r18dict_GD_jtwc['11520'] = [float((120+105+100+105)*1.852/4), float((120+105+100+105)*1.852/4)] #Krovanh 2003, from bwp122003.txt
r18dict_GD_jtwc['11528'] = [float((190+190+170+160)*1.852/4), float((170+170+145+135)*1.852/4)] #Dujuan 2003, from bwp142004.txt
r18dict_GD_jtwc['11605'] = [float((40+40+40+40)*1.852/4), float((40+40+40+40)*1.852/4)] #Kompasu 2004, from bwp122004.txt
r18dict_GD_jtwc['11623'] = [float((110+135+135+90)*1.852/4), float((110+135+135+90)*1.852/4)]#Aere 2004, from bwp202004.txt
r18dict_GD_jtwc['11725'] = [float((80+80+75+70)*1.852/4), float((80+80+75+70)*1.852/4)] #Sanvu 2005, from bwp102005.txt
r18dict_GD_jtwc['11810'] = [float((140+140+120+120)*1.852/4), float((140+140+120+100)*1.852/4)] #Chanchu 2006, from bwp022006.txt
r18dict_GD_jtwc['11814'] = [] #Jelawat 2006, no data, from bwp032006.txt
r18dict_GD_jtwc['11826'] = [float((140+120+120+105)*1.852/4), float((140+120+100+100)*1.852/4)] #Prapiroon 2006, from bwp072006.txt
r18dict_GD_jtwc['11842'] = [] #TD0823 2006, no data, from bwp132006.txt
r18dict_GD_jtwc['11850'] = [] #TD0912 2006, no data, from bwp152006.txt
r18dict_GD_jtwc['11999'] = [float((70+70+60+60)*1.852/4), float((70+65+60+65)*1.852/4)] #Neoguri 2008, from bwp022008.txt
r18dict_GD_jtwc['12012'] = [float((90+90+80+75)*1.852/4), float((90+90+80+75)*1.852/4)] #Fengshen 2008, from bwp072008.txt
r18dict_GD_jtwc['12025'] = [float((90+75+75+85)*1.852/4), float((80+70+70+80)*1.852/4)] #Kamuri 2008, from bwp102008.txt
r18dict_GD_jtwc['12034'] = [float((55+55+55+55)*1.852/4), float((55+55+55+55)*1.852/4)] #Nuri 2008, from bwp132008.txt
r18dict_GD_jtwc['12048'] = [float((155+135+130+155)*1.852/4), float((155+135+130+155)*1.852/4)] #Hagupit 2008, from bwp182008.txt
r18dict_GD_jtwc['12052'] = [] #Higos 2008, no data, from bwp212008.txt
r18dict_GD_jtwc['12110'] = [] #Nangka 2009, no data, from bwp042009.txt
r18dict_GD_jtwc['12114'] = [] #Soudelor 2009, no data, from bwp052009.txt
r18dict_GD_jtwc['12118'] = [float((85+70+55+60)*1.852/4), float((85+70+55+60)*1.852/4)] #Molave 2009, from bwp072009.txt
r18dict_GD_jtwc['12121'] = [] #Goni 2009, no data, from bwp082009.txt
r18dict_GD_jtwc['12146'] = [float((130+75+95+80)*1.852/4), float((130+75+95+80)*1.852/4)] #Koppu 2009, from bwp162009.dat
r18dict_GD_jtwc['12215'] = [float((105+95+75+95)*1.852/4), float((105+95+75+95)*1.852/4)] #Chanthu 2010, from bwp042010.txt
r18dict_GD_jtwc['12238'] = [float((120+130+100+85)*1.852/4), float((120+130+100+85)*1.852/4)] #Fanapi 2010, from bwp122010.txt
r18dict_GD_jtwc['12245'] = [] #Fourteen 2010, no data
r18dict_GD_jtwc['12294'] = [] #Sarika 2011, no data
r18dict_GD_jtwc['12296'] = [] #Haima 2011, no data
r18dict_GD_jtwc['12393'] = []#[float((70+55+55+70)*1.852/4), 'no data'] #Doksuri 2012, from bwp072012.dat
r18dict_GD_jtwc['12399'] = [float((110+95+95+119)*1.852/4), float((110+95+95+119)*1.852/4)] #Vincente 2012, from bwp092012.dat
r18dict_GD_jtwc['12408'] = [float((80+70+70+90)*1.852/4), float((80+70+70+80)*1.852/4)] #Kai-tak 2012, from bwp142012.dat
r18dict_GD_jtwc['12485'] = [float((70+65+60+60)*1.852/4), float((70+65+60+60)*1.852/4)] #Rumbia 2013, from bwp062013.dat
r18dict_GD_jtwc['12498'] = [float((125+130+125+110)*1.852/4), float((125+130+125+110)*1.852/4)] #Utor 2013, from bwp112013.dat
r18dict_GD_jtwc['12520'] = [float((155+140+160+165)*1.852/4), float((155+140+160+165)*1.852/4)] #Usagi 2013
r18dict_GD_jtwc['12594'] = [float((90+90+80+80)*1.852/4), float((90+90+80+80)*1.852/4)] #Hagibis 2014, from bwp072014.dat
r18dict_GD_jtwc['12600'] = [float((115+100+100+115)*1.852/4), float((115+100+100+115)*1.852/4)] #Rammasun 2014, from bwp092014.dat
r18dict_GD_jtwc['12621'] = [] #Fourteen 2014, from bwp142014.dat, no data
r18dict_GD_jtwc['12624'] = [float((155+150+140+150)*1.852/4), float((155+150+140+150)*1.852/4)] #Kalmaegi 2014, from bwp152014.dat
r18dict_GD_jtwc['12688'] = [float((75+70+70+75)*1.852/4), float((75+60+60+75)*1.852/4)] #Linfa 2015, from bwp102015.dat
r18dict_GD_jtwc['12733'] = [float((135+120+120+135)*1.852/4), float((135+120+120+135)*1.852/4)] #Mujigae 2015, from bwp222015.dat
r18dict_GD_jtwc['12776'] = [] #TD0526 2016, from bwp012016.dat, no data
r18dict_GD_jtwc['12793'] = [float((150+150+150+150)*1.852/4), float((150+150+150+130)*1.852/4)] #Nida 2016, from bwp062016.dat
r18dict_GD_jtwc['12837'] = [float((150+120+120+150)*1.852/4), float((150+120+120+150)*1.852/4)] #Haima 2016, from bwp252016.dat
r18dict_GD_jtwc['12861'] = [float((150+115+95+140)*1.852/4), float((100+100+95+95)*1.852/4)] #Hato 2017, from bwp152017.dat
r18dict_GD_jtwc['12862'] = [] #Mangkhut 2018, no data

r18dict_GD_jtwc_2014 = {}
r18dict_GD_jtwc_2014['12594'] = [float((90+90+80+80)*1.852/4), float((90+90+80+80)*1.852/4)] #Hagibis 2014, from bwp072014.dat
r18dict_GD_jtwc_2014['12600'] = [float((115+100+100+115)*1.852/4), float((115+100+100+115)*1.852/4)] #Rammasun 2014, from bwp092014.dat
r18dict_GD_jtwc_2014['12621'] = [] #Fourteen 2014, from bwp142014.dat, no data
r18dict_GD_jtwc_2014['12624'] = [float((155+150+140+150)*1.852/4), float((155+150+140+150)*1.852/4)] #Kalmaegi 2014, from bwp152014.dat
r18dict_GD_jtwc_2014['12688'] = [float((75+70+70+75)*1.852/4), float((75+60+60+75)*1.852/4)] #Linfa 2015, from bwp102015.dat
r18dict_GD_jtwc_2014['12733'] = [float((135+120+120+135)*1.852/4), float((135+120+120+135)*1.852/4)] #Mujigae 2015, from bwp222015.dat
r18dict_GD_jtwc_2014['12776'] = [] #TD0526 2016, from bwp012016.dat, no data
r18dict_GD_jtwc_2014['12793'] = [float((150+150+150+150)*1.852/4), float((150+150+150+130)*1.852/4)] #Nida 2016, from bwp062016.dat
r18dict_GD_jtwc_2014['12837'] = [float((150+120+120+150)*1.852/4), float((150+120+120+150)*1.852/4)] #Haima 2016, from bwp252016.dat
r18dict_GD_jtwc_2014['12861'] = [float((150+115+95+140)*1.852/4), float((100+100+95+95)*1.852/4)] #Hato 2017, from bwp152017.dat
r18dict_GD_jtwc_2014['12862'] = [] #Mangkhut 2018, no data

r18dict_GD_cma = {} #Values in km
r18dict_GD_cma['12594'] = [] #no data
r18dict_GD_cma['12600'] = [] #no data
r18dict_GD_cma['12621'] = [] #no data
r18dict_GD_cma['12624'] = [] #no data
r18dict_GD_cma['12688'] = [float((120+150+120+100)/4), float((120+150+120+100)/4)]
r18dict_GD_cma['12733'] = [float((200+200+200+200)/4), float((200+200+200+200)/4)]
r18dict_GD_cma['12776'] = [] #no data
r18dict_GD_cma['12793'] = [float((300+260+260+300)/4), float((260+260+170+170)/4)]
r18dict_GD_cma['12837'] = []#[float((380+350+380+320)/4), 'no data']
r18dict_GD_cma['12861'] = [float((280+220+260+240)/4), float((280+220+260+240)/4)]
r18dict_GD_cma['12862'] = []#[float((400+300+300+250)/4), 'no data']

r18dict_PHI_jtwc = {} #Values in nautical mile, change to km
r18dict_PHI_jtwc['12566'] = [] #Kajiki 2014, from bwp022014.dat, no data
r18dict_PHI_jtwc['12580'] = [] #Four 2014, no data
r18dict_PHI_jtwc['12600'] = [float((80+80+85+85)*1.852/4), float((80+80+85+85)*1.852/4)] #Rammasun 2014, from bwp092014.dat
r18dict_PHI_jtwc['12624'] = [float((110+120+120+110)*1.852/4),float((110+120+120+110)*1.852/4)] #Kalmaegi 2014, from bwp152014.dat
r18dict_PHI_jtwc['12627'] = [float((50+70+55+55)*1.852/4), float((55+40+40+55)*1.852/4)] #Fung-wong 2014, from bwp162014.dat
r18dict_PHI_jtwc['12645'] = [] #Sinlaku 2014, from bwp212014.dat, no data
r18dict_PHI_jtwc['12646'] = [float((125+110+110+125)*1.852/4), float((125+110+110+125)*1.852/4)] #Hagupit 2014, from bwp222014.dat
r18dict_PHI_jtwc['12649'] = ['no data', float((40+40+40+40)*1.852/4)] #Jangmi 2014, from bwp232014.dat
r18dict_PHI_jtwc['12651'] = [float((75+70+75+90)*1.852/4), float((75+70+75+80)*1.852/4)] #Mekkhala 2015, from bwp012015.dat
r18dict_PHI_jtwc['12669'] = [] #Maysak 2015, from bwp042015.dat, no data
r18dict_PHI_jtwc['12688'] = [float((80+65+65+80)*1.852/4), float((80+65+65+80)*1.852/4)] #Linfa 2015, from bwp102015.dat
r18dict_PHI_jtwc['12733'] = [] #Mujigae 2015, from bwp222015.dat, no data
r18dict_PHI_jtwc['12738'] = [float((110+155+95+85)*1.852/4), float((110+155+95+85)*1.852/4)] #Koppu 2015, from bwp242015.dat
r18dict_PHI_jtwc['12754'] = [float((135+75+75+110)*1.852/4), float((135+75+75+110)*1.852/4)] #Melor 2015, bwp282015.dat
r18dict_PHI_jtwc['12836'] = [float((150+110+110+150)*1.852/4), float((160+110+120+170)*1.852/4)] #Sarika 2016, from bwp242016.dat
r18dict_PHI_jtwc['12837'] = [float((220+200+190+200)*1.852/4), float((230+210+190+200)*1.852/4)] #Haima 2016, from bwp252016.dat
r18dict_PHI_jtwc['12846'] = [float((50+50+50+50)*1.852/4), float((50+50+50+50)*1.852/4)] #Tokage 2016, from bwp292016.dat
r18dict_PHI_jtwc['12852'] = [float((120+80+90+110)*1.852/4), float((110+80+80+120)*1.852/4)] #Nock-ten 2016
r18dict_PHI_jtwc['12862'] = [] #Mangkhut 2018, no data

r18dict_PHI_cma = {} #Values in km
r18dict_PHI_cma['12566'] = [] #no data
r18dict_PHI_cma['12580'] = [] #no data
r18dict_PHI_cma['12600'] = [] #no data
r18dict_PHI_cma['12624'] = [] #no data
r18dict_PHI_cma['12627'] = [] #no data
r18dict_PHI_cma['12645'] = [] #no data
r18dict_PHI_cma['12646'] = [] #no data
r18dict_PHI_cma['12649'] = [] #no data
r18dict_PHI_cma['12651'] = [] #no data
r18dict_PHI_cma['12669'] = [float((200+150+50+100)/4), float((150+130+70+70)/4)]
r18dict_PHI_cma['12688'] = [float((250+220+250+200)/4), float((260+230+260+200)/4)]
r18dict_PHI_cma['12733'] = [] #no data
r18dict_PHI_cma['12738'] = [float((350+350+350+300)/4), float((350+350+350+300)/4)]
r18dict_PHI_cma['12754'] = [float((250+230+200+250)/4), float((250+230+200+250)/4)]
r18dict_PHI_cma['12836'] = [float((260+240+200+240)/4), float((230+230+260+260)/4)]
r18dict_PHI_cma['12837'] = [float((380+350+380+320)/4), float((380+350+380+320)/4)]
r18dict_PHI_cma['12846'] = [] #no data
r18dict_PHI_cma['12852'] = [float((280+200+240+280)/4), float((280+220+220+240)/4)]
r18dict_PHI_cma['12862'] = [float((600+500+350+500)/4), float((550+480+480+400)/4)]

toc = time.perf_counter()
print(toc - tic)

In [None]:
(len(r18dict_GD_jtwc),
len(r18dict_GD_jtwc_2014),
len(r18dict_GD_cma),
len(r18dict_PHI_jtwc),
len(r18dict_PHI_cma))

In [None]:
count = 0
for k, v in r18dict_PHI_cma.items():
    if v:
        count += 1
print(count)