# JTWC bulletin parser

In [1]:
import feedparser

In [2]:
import numpy as np
import pandas as pd

In [3]:
import urllib

In [4]:
import re

In [5]:
import matplotlib.pyplot as plt
import mplleaflet

In [6]:
%matplotlib inline

In [8]:
from Storms.utils import readxml

In [9]:
nm2m=1852. # 1 nautical mile to meters
kt2ms=nm2m/3600.  # knots to m/s

### retrieve from website 

In [10]:
url = 'https://metoc.ndbc.noaa.gov/RSSFeeds-portlet/img/jtwc/jtwc.rss'

In [11]:
print readxml(url,'lxml')

JTWC TROPICAL CYCLONE INFORMATION FEED
https://metoc.ndbc.noaa.gov/web/guest/jtwc
JTWC Tropical Cyclone Information
Mon, 08 May 2017 09:04:07 +0000
Mon, 08 May 2017 09:04:07 +0000
ContentFeeder 2.0
https://metoc.ndbc.noaa.gov/web/guest/jtwc
JTWC TROPICAL CYCLONE INFORMATION FEED
https://metoc.ndbc.noaa.gov/web/guest/jtwc
Current Northwest Pacific/North Indian Ocean* Tropical Systems
https://metoc.ndbc.noaa.gov/web/guest/jtwc
* Includes Bay of Bengal and Arabian Sea
]]>
CDO.JTWC.fct@navy.mil (JTWC CDO)
Northwest Pacific/North Indian Ocean* Tropical Systems
Mon, 08 May 17 09:04:07 +0000
Current Central/Eastern Pacific Tropical Systems
https://metoc.ndbc.noaa.gov/web/guest/jtwc
]]>
CDO.JTWC.fct@navy.mil (JTWC CDO)
Central/Eastern Pacific Tropical Systems
Mon, 08 May 17 09:04:07 +0000
Current Southern Hemisphere Tropical Systems
https://metoc.ndbc.noaa.gov/web/guest/jtwc
Tropical Cyclone
Issued at 08/0900Z
JMV 3.0 Data
Google Earth Overlay
Multispectral Satellite Imagery
Satellite Fix Bull

In [12]:
jtwc=feedparser.parse(url) # parse the webpage

In [13]:
jtwc.keys()

['feed',
 'status',
 'updated',
 'updated_parsed',
 'encoding',
 'bozo',
 'headers',
 'etag',
 'href',
 'version',
 'entries',
 'namespaces']

In [14]:
# collect the summeries (one for each alert present in the webpage)
txt=[]
bname=[]
data=jtwc['entries']
for k in range(len(data)):
    if 'tcw' in data[k].summary : 
        bname.append(data[k].title_detail.value)
        txt.append(data[k].summary)

In [15]:
len(txt)

1

In [16]:
bname

[u'Current Southern Hemisphere Tropical Systems']

In [17]:
txt_ = ''.join(txt)

In [18]:
elp = [re.sub('<[^<]+>', "", elem) for elem in txt_.split('Tropical')] # we split the text to get the references for the alerts

In [19]:
hur = [elem.strip().split('\n')[0] for elem in elp] # the names of the TCs are...

In [20]:
hur



In [21]:
hur = [ elem.encode('utf8').replace('"', '') for elem in hur] # get rid of unicode characters

In [22]:
hur = filter(None, hur) #filter out empty space

In [23]:
hur



In [24]:
elp = [re.sub('<[^<]+>', "", elem) for elem in txt_.split('href=')] # we now split differently to get the urls for the bulletins

In [25]:
bul = [elem for elem in elp if 'tcw' in elem] # collect the urls 

bul_ =  [elem.strip().split()[0] for elem in bul]

bul_

[u'"https://metoc.ndbc.noaa.gov/ProductFeeds-portlet/img/jtwc/products/sh1817.tcw"']

In [26]:
bul_ = [ elem.encode('utf8').replace('"', '') for elem in bul_] # get rid of unicode characters

In [27]:
bul_

['https://metoc.ndbc.noaa.gov/ProductFeeds-portlet/img/jtwc/products/sh1817.tcw']

## For the first bulletin

In [29]:
try:
    det = urllib.urlopen(bul_[0]).read() # download and parse the bulletin file
except:
    print 'No Bulletins'

In [30]:
print det

WTPS51 PGTW 080900    
2017050806 18P DONNA      019  01 155 07 SATL 020
T000 175S 1651E 115 R064 030 NE QD 035 SE QD 035 SW QD 030 NW QD R050 055 NE QD 045 SE QD 055 SW QD 040 NW QD R034 140 NE QD 180 SE QD 160 SW QD 110 NW QD 
T012 188S 1658E 110 R064 045 NE QD 045 SE QD 045 SW QD 030 NW QD R050 085 NE QD 095 SE QD 075 SW QD 055 NW QD R034 165 NE QD 195 SE QD 160 SW QD 110 NW QD 
T024 203S 1666E 090 R064 030 NE QD 030 SE QD 030 SW QD 015 NW QD R050 065 NE QD 075 SE QD 060 SW QD 040 NW QD R034 160 NE QD 180 SE QD 125 SW QD 095 NW QD 
T036 218S 1677E 065 R064 020 NE QD 025 SE QD 025 SW QD 020 NW QD R050 045 NE QD 045 SE QD 035 SW QD 025 NW QD R034 160 NE QD 170 SE QD 105 SW QD 080 NW QD 
T048 237S 1693E 050 R050 020 NE QD 010 SE QD 015 SW QD 010 NW QD R034 145 NE QD 160 SE QD 085 SW QD 055 NW QD 
T072 282S 1740E 035 R034 165 NE QD 175 SE QD 050 SW QD 055 NW QD 
AMP
    036HR BECOMING EXTRATROPICAL
    048HR BECOMING EXTRATROPICAL
    072HR EXTRATROPICAL
   01 ACTIVE TROPICA

In [31]:
tstamp=det.split('\n')[2][:10]

In [32]:
#select the first lines where all the info is (conviniently) stored
bdata =[line.strip() for line in det.splitlines() if ('T' is line.strip()[0]) & ('QD' in line.strip()[-2:])]

In [33]:
bdata = [re.sub(' ',',',elem) for elem in bdata] # replace space with commas as delimiter

In [34]:
bdata = [v.split(',') for v in bdata] #split to array 

In [35]:
if bdata != []:
     df = pd.DataFrame(bdata) # create a dataframe

In [36]:
df.head() #check

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,T000,175S,1651E,115,R064,30,NE,QD,35,SE,...,QD,180.0,SE,QD,160.0,SW,QD,110.0,NW,QD
1,T012,188S,1658E,110,R064,45,NE,QD,45,SE,...,QD,195.0,SE,QD,160.0,SW,QD,110.0,NW,QD
2,T024,203S,1666E,90,R064,30,NE,QD,30,SE,...,QD,180.0,SE,QD,125.0,SW,QD,95.0,NW,QD
3,T036,218S,1677E,65,R064,20,NE,QD,25,SE,...,QD,170.0,SE,QD,105.0,SW,QD,80.0,NW,QD
4,T048,237S,1693E,50,R050,20,NE,QD,10,SE,...,,,,,,,,,,


In [37]:
#expand the dataFrame to include all possible values
if df.shape[1] < 43 : df = pd.concat([df,pd.DataFrame(columns=list(np.arange(df.shape[1],43)))])

In [38]:
df # check

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,T000,175S,1651E,115,R064,30,NE,QD,35,SE,...,QD,180.0,SE,QD,160.0,SW,QD,110.0,NW,QD
1,T012,188S,1658E,110,R064,45,NE,QD,45,SE,...,QD,195.0,SE,QD,160.0,SW,QD,110.0,NW,QD
2,T024,203S,1666E,90,R064,30,NE,QD,30,SE,...,QD,180.0,SE,QD,125.0,SW,QD,95.0,NW,QD
3,T036,218S,1677E,65,R064,20,NE,QD,25,SE,...,QD,170.0,SE,QD,105.0,SW,QD,80.0,NW,QD
4,T048,237S,1693E,50,R050,20,NE,QD,10,SE,...,,,,,,,,,,
5,T072,282S,1740E,35,R034,165,NE,QD,175,SE,...,,,,,,,,,,


In [39]:
# drop the text columns [NE,QD,....]
for ref in ['NE','NW','SW','SE','QD']:
     df = df.loc[:, (df != [ref]).all(axis=0)]

In [40]:
df = df.set_index(df.columns[0]) # set time as index

In [41]:
df.head() #check

Unnamed: 0_level_0,1,2,3,4,5,8,11,14,17,18,21,24,27,30,31,34,37,40
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T000,175S,1651E,115,R064,30,35,35,30,R050,55,45,55,40,R034,140.0,180.0,160.0,110.0
T012,188S,1658E,110,R064,45,45,45,30,R050,85,95,75,55,R034,165.0,195.0,160.0,110.0
T024,203S,1666E,90,R064,30,30,30,15,R050,65,75,60,40,R034,160.0,180.0,125.0,95.0
T036,218S,1677E,65,R064,20,25,25,20,R050,45,45,35,25,R034,160.0,170.0,105.0,80.0
T048,237S,1693E,50,R050,20,10,15,10,R034,145,160,85,55,,,,,


In [42]:
df.columns=np.arange(df.shape[1]) # rename the columns

In [43]:
df.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T000,175S,1651E,115,R064,30,35,35,30,R050,55,45,55,40,R034,140.0,180.0,160.0,110.0
T012,188S,1658E,110,R064,45,45,45,30,R050,85,95,75,55,R034,165.0,195.0,160.0,110.0
T024,203S,1666E,90,R064,30,30,30,15,R050,65,75,60,40,R034,160.0,180.0,125.0,95.0
T036,218S,1677E,65,R064,20,25,25,20,R050,45,45,35,25,R034,160.0,170.0,105.0,80.0
T048,237S,1693E,50,R050,20,10,15,10,R034,145,160,85,55,,,,,


In [44]:
#move the values to appropiate place based on the wind radii in order to force [64,50,34].

for i in range(df.shape[0]):
    if df.iloc[i,3]=='R034' : 
        df.iloc[i,13:] = df.iloc[i,3:8].values
        df.iloc[i,3:8] = None
    elif df.iloc[i,3]=='R050' :
        df.iloc[i,8:] = df.iloc[i,3:13].values
        df.iloc[i,3:8] = None


In [45]:
df #check

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T000,175S,1651E,115,R064,30.0,35.0,35.0,30.0,R050,55.0,45.0,55.0,40.0,R034,140,180,160,110
T012,188S,1658E,110,R064,45.0,45.0,45.0,30.0,R050,85.0,95.0,75.0,55.0,R034,165,195,160,110
T024,203S,1666E,90,R064,30.0,30.0,30.0,15.0,R050,65.0,75.0,60.0,40.0,R034,160,180,125,95
T036,218S,1677E,65,R064,20.0,25.0,25.0,20.0,R050,45.0,45.0,35.0,25.0,R034,160,170,105,80
T048,237S,1693E,50,,,,,,R050,20.0,10.0,15.0,10.0,R034,145,160,85,55
T072,282S,1740E,35,,,,,,,,,,,R034,165,175,50,55


In [46]:
df = df.drop(df.columns[[3,8,13]], axis=1) # drop text columns [R064,R050,R034]

In [47]:
#set columns
df.columns=['lat','lon','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [48]:
df #check

Unnamed: 0_level_0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
T000,175S,1651E,115,30.0,35.0,35.0,30.0,55.0,45.0,55.0,40.0,140,180,160,110
T012,188S,1658E,110,45.0,45.0,45.0,30.0,85.0,95.0,75.0,55.0,165,195,160,110
T024,203S,1666E,90,30.0,30.0,30.0,15.0,65.0,75.0,60.0,40.0,160,180,125,95
T036,218S,1677E,65,20.0,25.0,25.0,20.0,45.0,45.0,35.0,25.0,160,170,105,80
T048,237S,1693E,50,,,,,20.0,10.0,15.0,10.0,145,160,85,55
T072,282S,1740E,35,,,,,,,,,165,175,50,55


In [49]:
tidx = [np.float(elem[1:]) for elem in df.index.values] #convert time from string to float 

In [50]:
df.index=tidx # set float time as index

In [51]:
df # check

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,175S,1651E,115,30.0,35.0,35.0,30.0,55.0,45.0,55.0,40.0,140,180,160,110
12.0,188S,1658E,110,45.0,45.0,45.0,30.0,85.0,95.0,75.0,55.0,165,195,160,110
24.0,203S,1666E,90,30.0,30.0,30.0,15.0,65.0,75.0,60.0,40.0,160,180,125,95
36.0,218S,1677E,65,20.0,25.0,25.0,20.0,45.0,45.0,35.0,25.0,160,170,105,80
48.0,237S,1693E,50,,,,,20.0,10.0,15.0,10.0,145,160,85,55
72.0,282S,1740E,35,,,,,,,,,165,175,50,55


In [52]:
df = df.fillna(0) # replace None with zeros

In [53]:
df #check

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,175S,1651E,115,30,35,35,30,55,45,55,40,140,180,160,110
12.0,188S,1658E,110,45,45,45,30,85,95,75,55,165,195,160,110
24.0,203S,1666E,90,30,30,30,15,65,75,60,40,160,180,125,95
36.0,218S,1677E,65,20,25,25,20,45,45,35,25,160,170,105,80
48.0,237S,1693E,50,0,0,0,0,20,10,15,10,145,160,85,55
72.0,282S,1740E,35,0,0,0,0,0,0,0,0,165,175,50,55


In [54]:
#create lambda functions for converting the lat lon notation to float
chlat = lambda x: '-'+x[:-1] if x[-1]=='S' else x[:-1]
chlon = lambda x: '-'+x[:-1] if x[-1]=='W' else x[:-1]

In [55]:
# convert lat,lon to -180,180
df.lat = df.lat.map(chlat)
df.lon = df.lon.map(chlon)

In [56]:
df

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,-175,1651,115,30,35,35,30,55,45,55,40,140,180,160,110
12.0,-188,1658,110,45,45,45,30,85,95,75,55,165,195,160,110
24.0,-203,1666,90,30,30,30,15,65,75,60,40,160,180,125,95
36.0,-218,1677,65,20,25,25,20,45,45,35,25,160,170,105,80
48.0,-237,1693,50,0,0,0,0,20,10,15,10,145,160,85,55
72.0,-282,1740,35,0,0,0,0,0,0,0,0,165,175,50,55


In [57]:
df[['lat','lon']] = df[['lat','lon']].apply(pd.to_numeric,downcast='float').divide(10) #convert to float and divide by 10

In [58]:
df = df.apply(pd.to_numeric,downcast='float') #convert all values to float

In [59]:
df #check

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,-17.5,165.100006,115.0,30.0,35.0,35.0,30.0,55.0,45.0,55.0,40.0,140.0,180.0,160.0,110.0
12.0,-18.799999,165.800003,110.0,45.0,45.0,45.0,30.0,85.0,95.0,75.0,55.0,165.0,195.0,160.0,110.0
24.0,-20.299999,166.600006,90.0,30.0,30.0,30.0,15.0,65.0,75.0,60.0,40.0,160.0,180.0,125.0,95.0
36.0,-21.799999,167.699997,65.0,20.0,25.0,25.0,20.0,45.0,45.0,35.0,25.0,160.0,170.0,105.0,80.0
48.0,-23.700001,169.300003,50.0,0.0,0.0,0.0,0.0,20.0,10.0,15.0,10.0,145.0,160.0,85.0,55.0
72.0,-28.200001,174.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,165.0,175.0,50.0,55.0


In [60]:
df.vmax = df.vmax * kt2ms # convert maximum wind speed to m/s

In [61]:
df['notes']=0 ## add column notes for compatibility with operational

In [62]:
#display track
plt.plot(df.lon,df.lat,'o--')
mplleaflet.display()

## output inpData

In [63]:
column_order=['lat','lon','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw','notes']

In [64]:
header=['lat','long','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw','notes']

In [None]:
#df.to_csv('tmp/inpDataJTWC.txt',index=True, columns=column_order, sep='\t', header=header)