# Parsing of ATCF  type files

This is a notebook that outlines the parcing of info files from the ATCF Archive at http://ftp.nhc.noaa.gov/atcf/archive/ or HWRF files at http://ftpprd.ncep.noaa.gov/data/nccf/com/hur/prod/.

It is the first step in providing the u,v,p forcing terms. See also corrsponding Notebooks for parsing other type of info. 

In [1]:
#%matplotlib notebook

In [2]:
%matplotlib inline

### Definitions

In [3]:
import numpy as np

In [4]:
from parameters import *

In [5]:
from netCDF4 import Dataset

In [6]:
import glob

In [7]:
import re

In [8]:
import datetime

In [9]:
import pandas as pd

In [10]:
import matplotlib.pyplot as plt

In [11]:
import mplleaflet

# ANALYSIS STARTS HERE

In [12]:
path='test/'

In [13]:
#filename = 'bal212010.dat'
filename = 'matthew14l.2016092912.trak.hwrf.atcfunix'

In [14]:
atcf_header=['BASIN', 'CY', 'YYYYMMDDHH', 'TECHNUM/MIN', 'TECH', 'TAU', 'LatN/S', 'LonE/W', 'VMAX', 'MSLP', 'TY', 'RAD', 'WINDCODE', 'RAD1', 'RAD2', 'RAD3', 'RAD4', 'POUTER', 'ROUTER', 'RMW', 'GUSTS', 'EYE', 'SUBREGION', 'MAXSEAS', 'INITIALS', 'DIR', 'SPEED', 'STORMNAME', 'DEPTH', 'SEAS', 'SEASCODE', 'SEAS1', 'SEAS2', 'SEAS3', 'SEAS4', 'USERDEFINED', 'userdata']

In [15]:
try:
    data=pd.read_csv(path+filename, header=None, engine='python') # HWRF
except:
    data=pd.read_csv(path+filename, header=None, names=atcf_header, engine='python') #B-files

In [16]:
data=data.iloc[:, :37]

In [17]:
data.columns=atcf_header

In [18]:
data.head()

Unnamed: 0,BASIN,CY,YYYYMMDDHH,TECHNUM/MIN,TECH,TAU,LatN/S,LonE/W,VMAX,MSLP,...,STORMNAME,DEPTH,SEAS,SEASCODE,SEAS1,SEAS2,SEAS3,SEAS4,USERDEFINED,userdata
0,AL,14,2016092912,3,HWRF,0,142N,655W,60,995,...,,,,,0,0,0,0,THERMO PARAMS,-9999
1,AL,14,2016092912,3,HWRF,0,142N,655W,60,995,...,,,,,0,0,0,0,THERMO PARAMS,-9999
2,AL,14,2016092912,3,HWRF,1,142N,657W,57,998,...,,,,,0,0,0,0,THERMO PARAMS,2
3,AL,14,2016092912,3,HWRF,1,142N,657W,57,998,...,,,,,0,0,0,0,THERMO PARAMS,2
4,AL,14,2016092912,3,HWRF,2,143N,659W,66,997,...,,,,,0,0,0,0,THERMO PARAMS,-30


In [19]:
# usually HWRF doen't give the strom name so we take it from the filename
if data.STORMNAME.str.strip().all() == '' : data.STORMNAME = filename.split('.')[0][:-3].upper()

In [20]:
data.iloc[:,6:26]

Unnamed: 0,LatN/S,LonE/W,VMAX,MSLP,TY,RAD,WINDCODE,RAD1,RAD2,RAD3,RAD4,POUTER,ROUTER,RMW,GUSTS,EYE,SUBREGION,MAXSEAS,INITIALS,DIR
0,142N,655W,60,995,XX,34,NEQ,151,146,53,118,-99,-99,37,0,0,,0,,0
1,142N,655W,60,995,XX,50,NEQ,73,0,0,69,-99,-99,37,0,0,,0,,0
2,142N,657W,57,998,XX,34,NEQ,156,154,52,101,-99,-99,25,0,0,,0,,0
3,142N,657W,57,998,XX,50,NEQ,66,0,35,56,-99,-99,25,0,0,,0,,0
4,143N,659W,66,997,XX,34,NEQ,142,136,50,114,-99,-99,31,0,0,,0,,0
5,143N,659W,66,997,XX,50,NEQ,62,42,0,55,-99,-99,31,0,0,,0,,0
6,143N,659W,66,997,XX,64,NEQ,35,0,0,0,-99,-99,31,0,0,,0,,0
7,143N,661W,65,995,XX,34,NEQ,137,150,43,130,-99,-99,26,0,0,,0,,0
8,143N,661W,65,995,XX,50,NEQ,61,0,28,59,-99,-99,26,0,0,,0,,0
9,143N,661W,65,995,XX,64,NEQ,0,0,0,29,-99,-99,26,0,0,,0,,0


In [21]:
lon=data['LonE/W']

In [22]:
lon = [np.float(x[:-1])/10. if x[-1]=='E' else -np.float(x[:-1])/10. for x in lon]

In [23]:
lat=data['LatN/S']

In [24]:
lat = [np.float(x[:-1])/10. if x[-1]=='N' else -np.float(x[:-1])/10. for x in lat]

Check if we cross International Date Line (IDL)

In [25]:
sig=np.sign(lon)
sig1=sig[0]
m=sig != sig1

In [26]:
if sum(m)>0:
# adjust the lon values going from -180:180
        if sig1 > 0:
                lon[lon < 0] += 360.
        elif sig1 < 0:
                lon[lon > 0] -= 360.



In [27]:
plt.plot(lon,lat,'o--')
mplleaflet.display()

In [28]:
vmax = data['VMAX'] # 10 minute wind in Knots

In [29]:
mslp = data['MSLP'] # Minimum sea level pressure, 850 - 1050 mb.

In [30]:
penv = data['POUTER']     # pressure in millibars of the last closed isobar, 900 - 1050 mb.

In [31]:
time = data['YYYYMMDDHH'].apply(pd.to_datetime, format='%Y%m%d%H')+pd.to_timedelta(data['TAU'],'h')

In [32]:
rmw = data['RMW'] # in nautical miles

In [33]:
wradii=pd.DataFrame({'34ne':np.zeros(data.shape[0]), '34se':np.zeros(data.shape[0]), '34sw':np.zeros(data.shape[0]), '34nw':np.zeros(data.shape[0]), '50ne':np.zeros(data.shape[0]), '50se':np.zeros(data.shape[0]), '50sw':np.zeros(data.shape[0]), '50nw':np.zeros(data.shape[0]),'64ne':np.zeros(data.shape[0]), '64se':np.zeros(data.shape[0]), '64sw':np.zeros(data.shape[0]), '64nw':np.zeros(data.shape[0])})

In [34]:
wradii.head()

Unnamed: 0,34ne,34nw,34se,34sw,50ne,50nw,50se,50sw,64ne,64nw,64se,64sw
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
data.RAD=data.RAD.astype(str).str.strip() # convert to text format and delete whitespace

In [36]:
data.WINDCODE = data.WINDCODE.str.strip() # convert to text format and delete whitespace

In [37]:
rwcols=['RAD1', 'RAD2', 'RAD3', 'RAD4']

In [38]:
wcols=['34ne', '34se', '34sw', '34nw','50ne', '50se', '50sw', '50nw','64ne', '64se', '64sw', '64nw']

constract the wind radii matrix

In [39]:
for i in range(data.shape[0]):
    if data.WINDCODE.iloc[i] == 'NEQ' :
        rcols = [s for s in wradii.columns.values.astype(str) if data.RAD.iloc[i] in s]
        wradii.ix[i,rcols] = data.ix[i,rwcols].values

In [40]:
wradii = wradii.set_index(time)

In [41]:
wradii = wradii.groupby(level=0).sum() # merge rows for same time 

create the inpData matrix 

In [42]:
dic={'t':time, 'lat':lat,'lon':lon,'penv': penv, 'pcenter': mslp, 'vmax':vmax, 'rmax':rmw, 'hurName': data.STORMNAME} 

In [43]:
inp=pd.DataFrame(dic)

In [44]:
inp.head()

Unnamed: 0,hurName,lat,lon,pcenter,penv,rmax,t,vmax
0,MATTHEW,14.2,-65.5,995,-99,37,2016-09-29 12:00:00,60
1,MATTHEW,14.2,-65.5,995,-99,37,2016-09-29 12:00:00,60
2,MATTHEW,14.2,-65.7,998,-99,25,2016-09-29 13:00:00,57
3,MATTHEW,14.2,-65.7,998,-99,25,2016-09-29 13:00:00,57
4,MATTHEW,14.3,-65.9,997,-99,31,2016-09-29 14:00:00,66


In [45]:
inp = inp.drop_duplicates()

In [46]:
inp = inp.set_index('t')

In [47]:
inpData = pd.concat([inp,wradii], axis=1)

In [48]:
inpData.head()

Unnamed: 0_level_0,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34nw,34se,34sw,50ne,50nw,50se,50sw,64ne,64nw,64se,64sw
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2016-09-29 12:00:00,MATTHEW,14.2,-65.5,995,-99,37,60,151.0,146.0,53.0,118.0,73.0,0.0,0.0,69.0,0.0,0.0,0.0,0.0
2016-09-29 13:00:00,MATTHEW,14.2,-65.7,998,-99,25,57,156.0,154.0,52.0,101.0,66.0,0.0,35.0,56.0,0.0,0.0,0.0,0.0
2016-09-29 14:00:00,MATTHEW,14.3,-65.9,997,-99,31,66,142.0,136.0,50.0,114.0,62.0,42.0,0.0,55.0,35.0,0.0,0.0,0.0
2016-09-29 15:00:00,MATTHEW,14.3,-66.1,995,-99,26,65,137.0,150.0,43.0,130.0,61.0,0.0,28.0,59.0,0.0,0.0,0.0,29.0
2016-09-29 16:00:00,MATTHEW,14.3,-66.3,993,-99,23,66,144.0,144.0,44.0,160.0,66.0,0.0,33.0,79.0,0.0,0.0,27.0,38.0


In [49]:
inpData = inpData.dropna()

In [50]:
inpData = inpData.apply(pd.to_numeric, errors='ignore')

In [51]:
dph = (inpData.penv - inpData.pcenter) * 100 # convert to KP??????

In [52]:
inpData=inpData.assign(dp=dph)

### create inpData file

In [53]:
inpData.head()

Unnamed: 0_level_0,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34nw,34se,34sw,50ne,50nw,50se,50sw,64ne,64nw,64se,64sw,dp
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2016-09-29 12:00:00,MATTHEW,14.2,-65.5,995,-99,37,60,151.0,146.0,53.0,118.0,73.0,0.0,0.0,69.0,0.0,0.0,0.0,0.0,-109400
2016-09-29 13:00:00,MATTHEW,14.2,-65.7,998,-99,25,57,156.0,154.0,52.0,101.0,66.0,0.0,35.0,56.0,0.0,0.0,0.0,0.0,-109700
2016-09-29 14:00:00,MATTHEW,14.3,-65.9,997,-99,31,66,142.0,136.0,50.0,114.0,62.0,42.0,0.0,55.0,35.0,0.0,0.0,0.0,-109600
2016-09-29 15:00:00,MATTHEW,14.3,-66.1,995,-99,26,65,137.0,150.0,43.0,130.0,61.0,0.0,28.0,59.0,0.0,0.0,0.0,29.0,-109400
2016-09-29 16:00:00,MATTHEW,14.3,-66.3,993,-99,23,66,144.0,144.0,44.0,160.0,66.0,0.0,33.0,79.0,0.0,0.0,27.0,38.0,-109200


In [54]:
inpData.index[0]

Timestamp('2016-09-29 12:00:00')

In [55]:
inpData['time']=inpData.index-inpData.index[0]

In [56]:
inpData['time']=inpData['time'] / pd.Timedelta('1 hour')

In [57]:
inpData.reset_index(level=0, inplace=True)

In [58]:
inpData=inpData.set_index('time')

In [59]:
inpData.head()

Unnamed: 0_level_0,t,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34nw,...,34sw,50ne,50nw,50se,50sw,64ne,64nw,64se,64sw,dp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,2016-09-29 12:00:00,MATTHEW,14.2,-65.5,995,-99,37,60,151.0,146.0,...,118.0,73.0,0.0,0.0,69.0,0.0,0.0,0.0,0.0,-109400
1.0,2016-09-29 13:00:00,MATTHEW,14.2,-65.7,998,-99,25,57,156.0,154.0,...,101.0,66.0,0.0,35.0,56.0,0.0,0.0,0.0,0.0,-109700
2.0,2016-09-29 14:00:00,MATTHEW,14.3,-65.9,997,-99,31,66,142.0,136.0,...,114.0,62.0,42.0,0.0,55.0,35.0,0.0,0.0,0.0,-109600
3.0,2016-09-29 15:00:00,MATTHEW,14.3,-66.1,995,-99,26,65,137.0,150.0,...,130.0,61.0,0.0,28.0,59.0,0.0,0.0,0.0,29.0,-109400
4.0,2016-09-29 16:00:00,MATTHEW,14.3,-66.3,993,-99,23,66,144.0,144.0,...,160.0,66.0,0.0,33.0,79.0,0.0,0.0,27.0,38.0,-109200


## save inpData file

We save here in original units for  testing purposes

In [60]:
column_order=['lat','lon','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [61]:
header=['lat','long','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [62]:
inpData.to_csv('tmp/inpData.txt',index=True, columns=column_order, sep='\t', header=header)

## convert to SI

In [63]:
inpData['vmax']=inpData['vmax']*kt2ms

In [64]:
inpData['rmax']=inpData['rmax']*nm2m

In [65]:
inpData[wcols]=inpData[wcols]*nm2m

In [66]:
inpData.head()

Unnamed: 0_level_0,t,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34nw,...,34sw,50ne,50nw,50se,50sw,64ne,64nw,64se,64sw,dp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,2016-09-29 12:00:00,MATTHEW,14.2,-65.5,995,-99,68524.0,30.866667,279652.0,270392.0,...,218536.0,135196.0,0.0,0.0,127788.0,0.0,0.0,0.0,0.0,-109400
1.0,2016-09-29 13:00:00,MATTHEW,14.2,-65.7,998,-99,46300.0,29.323333,288912.0,285208.0,...,187052.0,122232.0,0.0,64820.0,103712.0,0.0,0.0,0.0,0.0,-109700
2.0,2016-09-29 14:00:00,MATTHEW,14.3,-65.9,997,-99,57412.0,33.953333,262984.0,251872.0,...,211128.0,114824.0,77784.0,0.0,101860.0,64820.0,0.0,0.0,0.0,-109600
3.0,2016-09-29 15:00:00,MATTHEW,14.3,-66.1,995,-99,48152.0,33.438889,253724.0,277800.0,...,240760.0,112972.0,0.0,51856.0,109268.0,0.0,0.0,0.0,53708.0,-109400
4.0,2016-09-29 16:00:00,MATTHEW,14.3,-66.3,993,-99,42596.0,33.953333,266688.0,266688.0,...,296320.0,122232.0,0.0,61116.0,146308.0,0.0,0.0,50004.0,70376.0,-109200


## save inpData file

In [67]:
column_order=['lat','lon','dp','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [68]:
header=['lat','long','dp','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [69]:
inpData.to_csv('tmp/inpDataSI.txt',index=True, columns=column_order, sep='\t', header=header)

### create bulInfo.txt file

In [70]:
tt=pd.to_datetime(inpData.t[0])

In [71]:
tt=datetime.datetime.strftime(tt,'%d %b %Y %H:%M:%S')

In [72]:
dic0={'advNo':[1],'tShift':[0],'$date':tt,'land':[1],'notes':[0]}

In [73]:
bul=pd.DataFrame.from_dict(dic0)

In [74]:
bul

Unnamed: 0,$date,advNo,land,notes,tShift
0,29 Sep 2016 12:00:00,1,1,0,0


In [75]:
bul.to_csv('tmp/bulInfo.txt',index=False, columns=['advNo','tShift','$date','land','notes'], sep='\t')

### create info.xml file

In [76]:
import xml.etree.ElementTree as et 
from xml.dom import minidom

def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = et.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

In [77]:
hurName = inpData.hurName.value_counts().index[0].strip()

In [78]:
basin = data.BASIN.value_counts().index[0].strip()

In [79]:
info = et.Element('setexp')
et.SubElement(info, 'source').text = 'Tropical Cyclone Bulletin through GDACS/PDC'
et.SubElement(info, 'hurName').text = hurName
et.SubElement(info, 'hurId').text = hurName
et.SubElement(info, 'basin').text = basin
et.SubElement(info, 'bulNo').text = '1'
et.SubElement(info, 'bulDate').text = tt
et.SubElement(info, 'n').text = '100000'
et.SubElement(info, 'fk').text = '0.81'
et.SubElement(info, 'stormsurge').text = '0'
et.SubElement(info, 'timefactor').text = '1'
et.SubElement(info, 'landfall').text = '1'

In [80]:
xmlf = minidom.parseString(prettify(info))

In [81]:
with open('tmp/info.xml','w') as f:
      xmlf.writexml(f)

## output

We can save the complete dataset for future use.

In [83]:
inpData.to_csv('test/step1.txt',index=True, sep='\t')

The next step is to compute translational and Coriolis velocity. See [Subtract translational and Coriolis velocity.ipynb](./Subtract translational and Coriolis velocity.ipynb)