# Parsing of ATCF Archive files

This is a notebook that outlines the parcing of info files from the ATCF Archive at http://ftp.nhc.noaa.gov/atcf/archive/. 

It is the first step in providing the u,v,p forcing terms. See also corrsponding Notebook for Best Track parsing [BestTrack.ipynb](./BestTrack.ipynb). 

In [1]:
#%matplotlib notebook

In [2]:
%matplotlib inline

### Definitions

In [3]:
import numpy as np
# -------------------------------------------------------------------------
# Const
# -------------------------------------------------------------------------
nm2m=1852. # 1 nautical mile to meters
kt2ms=nm2m/3600.  # knots to m/s
omega=2*np.pi/(3600.*24.) # angular speed omega=2pi*f(=frequency of earth : 1 cycle per day) 2pi* 1 / day in seconds
rhoa=1.15 #air density  Kg/m^3
radius=6378388 #137. # earth's radius according to WGS 84
deg2m=np.pi*radius/180.  # ds on cicle equals ds=r*dth - dth=pi/180
pn=101000.  # Atmospheric pressure [N/m^2] (101KPa - enviromental pressure)

tetaNE=45. #mean angle [degrees] of North Eastern quadrant
tetaNW=135. #        "              North Western
tetaSW=225. #        "              South West
tetaSE=315. #        "              South East

maxR=500.e3  # maximum radius of TC [m] (500Km)

In [4]:
from netCDF4 import Dataset

In [5]:
import glob

In [6]:
import re

In [7]:
import datetime

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import mplleaflet

In [9]:
def hvel(r):
    x=0.5+(r-rmaxh)/(maxR-rmaxh)*kh
    return (bh/rhoa*(rmaxh/r)**bh*dph*np.exp(-(rmaxh/r)**bh))**x

In [10]:
def pres(r):
    return pch+dph*np.exp(-(rmaxh/r)**bh)

In [11]:
def hvel2d(l1,l2,vtx,vty):
    r=np.sqrt(l1**2+l2**2)
    xh=0.5+(r-rmaxh)/(maxR-rmaxh)*kh
    ur=(bh/rhoa*(rmaxh/r)**bh*dph*np.exp(-(rmaxh/r)**bh))**xh
    theta=np.arctan2(l2,l1)
    ux=-ur*np.sin(theta)
    uy=ur*np.cos(theta)
    return ux+vtx,uy+vty, pres(r)

# ANALYSIS STARTS HERE

In [12]:
path='test/'

In [13]:
atcf_header=['BASIN', 'CY', 'YYYYMMDDHH', 'TECHNUM/MIN', 'TECH', 'TAU', 'LatN/S', 'LonE/W', 'VMAX', 'MSLP', 'TY', 'RAD', 'WINDCODE', 'RAD1', 'RAD2', 'RAD3', 'RAD4', 'POUTER', 'ROUTER', 'RMW', 'GUSTS', 'EYE', 'SUBREGION', 'MAXSEAS', 'INITIALS', 'DIR', 'SPEED', 'STORMNAME', 'DEPTH', 'SEAS', 'SEASCODE', 'SEAS1', 'SEAS2', 'SEAS3', 'SEAS4', 'USERDEFINED', 'userdata']

In [14]:
data=pd.read_csv(path+'bal212010.dat', header=None, names=atcf_header, low_memory=False)

In [15]:
data.head()

Unnamed: 0,BASIN,CY,YYYYMMDDHH,TECHNUM/MIN,TECH,TAU,LatN/S,LonE/W,VMAX,MSLP,...,STORMNAME,DEPTH,SEAS,SEASCODE,SEAS1,SEAS2,SEAS3,SEAS4,USERDEFINED,userdata
0,AL,21,2010102906,,BEST,0,90N,537W,30,1006,...,INVEST,S,,,,,,,,
1,AL,21,2010102912,,BEST,0,98N,553W,35,1003,...,INVEST,M,,,,,,,,
2,AL,21,2010102918,,BEST,0,108N,568W,45,998,...,TOMAS,M,12.0,NEQ,40.0,30.0,0.0,40.0,,
3,AL,21,2010103000,,BEST,0,119N,578W,55,999,...,TOMAS,M,12.0,NEQ,200.0,60.0,0.0,180.0,,
4,AL,21,2010103000,,BEST,0,119N,578W,55,999,...,TOMAS,M,12.0,NEQ,200.0,60.0,0.0,180.0,,


In [16]:
lon=data['LonE/W']

In [17]:
lon = [np.float(x[:-1])/10. if x[-1]=='E' else -np.float(x[:-1])/10. for x in lon]

In [18]:
lat=data['LatN/S']

In [19]:
lat = [np.float(x[:-1])/10. if x[-1]=='N' else -np.float(x[:-1])/10. for x in lat]

Check if we cross International Date Line (IDL)

In [20]:
sig=np.sign(lon)
sig1=sig[0]
m=sig != sig1

In [21]:
if sum(m)>0:
# adjust the lon values going from -180:180
        if sig1 > 0:
                lon[lon < 0] += 360.
        elif sig1 < 0:
                lon[lon > 0] -= 360.



In [22]:
plt.plot(lon,lat,'o--')
mplleaflet.display()

In [23]:
vmax = data['VMAX'] # 10 minute wind in Knots

In [24]:
mslp = data['MSLP'] # Minimum sea level pressure, 850 - 1050 mb.

In [25]:
penv = data['POUTER']     # pressure in millibars of the last closed isobar, 900 - 1050 mb.

In [26]:
time = data['YYYYMMDDHH'].apply(pd.to_datetime, format='%Y%m%d%H')

In [27]:
rmw = data['RMW'] # in nautical miles

In [28]:
wradii=pd.DataFrame({'34ne':np.zeros(data.shape[0]), '34se':np.zeros(data.shape[0]), '34sw':np.zeros(data.shape[0]), '34nw':np.zeros(data.shape[0]), '50ne':np.zeros(data.shape[0]), '50se':np.zeros(data.shape[0]), '50sw':np.zeros(data.shape[0]), '50nw':np.zeros(data.shape[0]),'64ne':np.zeros(data.shape[0]), '64se':np.zeros(data.shape[0]), '64sw':np.zeros(data.shape[0]), '64nw':np.zeros(data.shape[0])})

In [29]:
wradii.head()

Unnamed: 0,34ne,34nw,34se,34sw,50ne,50nw,50se,50sw,64ne,64nw,64se,64sw
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
data.RAD=data.RAD.astype(str).str.strip() # convert to text format and delete whitespace

In [31]:
data.WINDCODE = data.WINDCODE.str.strip() # convert to text format and delete whitespace

In [32]:
rwcols=['RAD1', 'RAD2', 'RAD3', 'RAD4']

In [33]:
wcols=['34ne', '34se', '34sw', '34nw','50ne', '50se', '50sw', '50nw','64ne', '64se', '64sw', '64nw']

constract the wind radii matrix

In [34]:
for i in range(data.shape[0]):
    if data.WINDCODE.iloc[i] == 'NEQ' :
        rcols = [s for s in wradii.columns.values.astype(str) if data.RAD.iloc[i] in s]
        wradii.ix[i,rcols] = data.ix[i,rwcols].values

create the inpData matrix 

In [35]:
dic={'t':time, 'lat':lat,'lon':lon,'penv': penv, 'pcenter': mslp, 'vmax':vmax, 'rmax':rmw, 'hurName': data.STORMNAME} 

In [36]:
inp=pd.DataFrame(dic)

In [37]:
inp.head()

Unnamed: 0,hurName,lat,lon,pcenter,penv,rmax,t,vmax
0,INVEST,9.0,-53.7,1006,1009,120.0,2010-10-29 06:00:00,30
1,INVEST,9.8,-55.3,1003,1009,90.0,2010-10-29 12:00:00,35
2,TOMAS,10.8,-56.8,998,1009,30.0,2010-10-29 18:00:00,45
3,TOMAS,11.9,-57.8,999,1009,30.0,2010-10-30 00:00:00,55
4,TOMAS,11.9,-57.8,999,1009,30.0,2010-10-30 00:00:00,55


In [38]:
inpData = pd.concat([inp,wradii], axis=1)

In [39]:
right = inpData[wcols].groupby(inpData.t).sum()

In [40]:
left = inp.drop_duplicates()

In [41]:
left = left.set_index('t')

In [42]:
inpData = pd.concat([left,right], axis=1)

In [43]:
inpData.head()

Unnamed: 0_level_0,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34se,34sw,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2010-10-29 06:00:00,INVEST,9.0,-53.7,1006,1009,120.0,30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2010-10-29 12:00:00,INVEST,9.8,-55.3,1003,1009,90.0,35,75.0,0.0,75.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2010-10-29 18:00:00,TOMAS,10.8,-56.8,998,1009,30.0,45,90.0,50.0,90.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2010-10-30 00:00:00,TOMAS,11.9,-57.8,999,1009,30.0,55,90.0,50.0,90.0,50.0,30.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0
2010-10-30 06:00:00,TOMAS,12.7,-58.9,997,1009,30.0,60,90.0,50.0,90.0,50.0,30.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0


In [44]:
inpData = inpData.dropna()

In [45]:
inpData = inpData.apply(pd.to_numeric, errors='ignore')

In [46]:
dph = (inpData.penv - inpData.pcenter) * 100 # convert to KP??????

In [47]:
inpData=inpData.assign(dp=dph)

### create inpData file

In [48]:
inpData.head()

Unnamed: 0_level_0,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34se,34sw,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw,dp
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2010-10-29 06:00:00,INVEST,9.0,-53.7,1006,1009,120.0,30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300
2010-10-29 12:00:00,INVEST,9.8,-55.3,1003,1009,90.0,35,75.0,0.0,75.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,600
2010-10-29 18:00:00,TOMAS,10.8,-56.8,998,1009,30.0,45,90.0,50.0,90.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1100
2010-10-30 00:00:00,TOMAS,11.9,-57.8,999,1009,30.0,55,90.0,50.0,90.0,50.0,30.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,1000
2010-10-30 06:00:00,TOMAS,12.7,-58.9,997,1009,30.0,60,90.0,50.0,90.0,50.0,30.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,1200


In [49]:
inpData.index[0]

Timestamp('2010-10-29 06:00:00')

In [50]:
inpData['time']=inpData.index-inpData.index[0]

In [51]:
inpData['time']=inpData['time'] / pd.Timedelta('1 hour')

In [52]:
inpData.reset_index(level=0, inplace=True)

In [53]:
inpData=inpData.set_index('time')

In [54]:
inpData.head()

Unnamed: 0_level_0,t,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34se,...,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw,dp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,2010-10-29 06:00:00,INVEST,9.0,-53.7,1006,1009,120.0,30,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300
6.0,2010-10-29 12:00:00,INVEST,9.8,-55.3,1003,1009,90.0,35,75.0,0.0,...,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,600
12.0,2010-10-29 18:00:00,TOMAS,10.8,-56.8,998,1009,30.0,45,90.0,50.0,...,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1100
18.0,2010-10-30 00:00:00,TOMAS,11.9,-57.8,999,1009,30.0,55,90.0,50.0,...,50.0,30.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,1000
24.0,2010-10-30 06:00:00,TOMAS,12.7,-58.9,997,1009,30.0,60,90.0,50.0,...,50.0,30.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,1200


## save inpData file

We save here in original units for  testing purposes

In [55]:
column_order=['lat','lon','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [56]:
header=['lat','long','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [57]:
inpData.to_csv('tmp/inpData.txt',index=True, columns=column_order, sep='\t', header=header)

## convert to SI

In [58]:
inpData['vmax']=inpData['vmax']*kt2ms

In [59]:
inpData['rmax']=inpData['rmax']*nm2m

In [60]:
inpData[wcols]=inpData[wcols]*nm2m

In [61]:
inpData.head()

Unnamed: 0_level_0,t,hurName,lat,lon,pcenter,penv,rmax,vmax,34ne,34se,...,34nw,50ne,50se,50sw,50nw,64ne,64se,64sw,64nw,dp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,2010-10-29 06:00:00,INVEST,9.0,-53.7,1006,1009,222240.0,15.433333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300
6.0,2010-10-29 12:00:00,INVEST,9.8,-55.3,1003,1009,166680.0,18.005556,138900.0,0.0,...,74080.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,600
12.0,2010-10-29 18:00:00,TOMAS,10.8,-56.8,998,1009,55560.0,23.15,166680.0,92600.0,...,92600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1100
18.0,2010-10-30 00:00:00,TOMAS,11.9,-57.8,999,1009,55560.0,28.294444,166680.0,92600.0,...,92600.0,55560.0,0.0,55560.0,0.0,0.0,0.0,0.0,0.0,1000
24.0,2010-10-30 06:00:00,TOMAS,12.7,-58.9,997,1009,55560.0,30.866667,166680.0,92600.0,...,92600.0,55560.0,0.0,55560.0,0.0,0.0,0.0,0.0,0.0,1200


## save inpData file

In [62]:
column_order=['lat','lon','dp','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [63]:
header=['lat','long','dp','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [64]:
inpData.to_csv('tmp/inpDataSI.txt',index=True, columns=column_order, sep='\t', header=header)

### create bulInfo.txt file

In [65]:
tt=pd.to_datetime(inpData.t[0])

In [66]:
tt=datetime.datetime.strftime(tt,'%d %b %Y %H:%M:%S')

In [67]:
dic0={'advNo':[1],'tShift':[0],'$date':tt,'land':[1],'notes':[0]}

In [68]:
bul=pd.DataFrame.from_dict(dic0)

In [69]:
bul

Unnamed: 0,$date,advNo,land,notes,tShift
0,29 Oct 2010 06:00:00,1,1,0,0


In [70]:
bul.to_csv('tmp/bulInfo.txt',index=False, columns=['advNo','tShift','$date','land','notes'], sep='\t')

### create info.xml file

In [71]:
import xml.etree.ElementTree as et 
from xml.dom import minidom

def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = et.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

In [72]:
hurName = inpData.hurName.value_counts().index[0].strip()

In [73]:
basin = data.BASIN.value_counts().index[0].strip()

In [74]:
info = et.Element('setexp')
et.SubElement(info, 'source').text = 'Tropical Cyclone Bulletin through GDACS/PDC'
et.SubElement(info, 'hurName').text = hurName
et.SubElement(info, 'hurId').text = hurName
et.SubElement(info, 'basin').text = basin
et.SubElement(info, 'bulNo').text = '1'
et.SubElement(info, 'bulDate').text = tt
et.SubElement(info, 'n').text = '100000'
et.SubElement(info, 'fk').text = '0.81'
et.SubElement(info, 'stormsurge').text = '0'
et.SubElement(info, 'timefactor').text = '1'
et.SubElement(info, 'landfall').text = '1'

In [75]:
xmlf = minidom.parseString(prettify(info))

In [76]:
with open('tmp/info.xml','w') as f:
      xmlf.writexml(f)

## output

We can save the complete dataset for future use.

In [77]:
inpData.to_csv('tmp/step1.txt',index=True, sep='\t')

The next step is to compute translational and Coriolis velocity. See [Subtract translational and Coriolis velocity.ipynb](./Subtract translational and Coriolis velocity.ipynb)