# JTWC bulletin parser into inpData

In [1]:
import feedparser

In [2]:
import numpy as np
import pandas as pd

In [3]:
import urllib

In [4]:
import re

In [5]:
import matplotlib.pyplot as plt
import mplleaflet

In [6]:
%matplotlib inline

In [7]:
from readxml import readxml

In [8]:
nm2m=1852. # 1 nautical mile to meters
kt2ms=nm2m/3600.  # knots to m/s

### retrieve from website 

In [9]:
url = 'https://metoc.ndbc.noaa.gov/RSSFeeds-portlet/img/jtwc/jtwc.rss'

In [10]:
print readxml(url,'lxml')

JTWC TROPICAL CYCLONE INFORMATION FEED
https://metoc.ndbc.noaa.gov/web/guest/jtwc
JTWC Tropical Cyclone Information
Fri, 28 Apr 2017 15:46:08 +0000
Fri, 28 Apr 2017 15:46:08 +0000
ContentFeeder 2.0
https://metoc.ndbc.noaa.gov/web/guest/jtwc
JTWC TROPICAL CYCLONE INFORMATION FEED
https://metoc.ndbc.noaa.gov/web/guest/jtwc
Current Northwest Pacific/North Indian Ocean* Tropical Systems
https://metoc.ndbc.noaa.gov/web/guest/jtwc
Tropical Depression
Issued at 27/2100Z
JMV 3.0 Data
Google Earth Overlay
IR Satellite Imagery
Satellite Fix Bulletin
* Includes Bay of Bengal and Arabian Sea
]]>
CDO.JTWC.fct@navy.mil (JTWC CDO)
Northwest Pacific/North Indian Ocean* Tropical Systems
Fri, 28 Apr 17 15:46:08 +0000
Current Central/Eastern Pacific Tropical Systems
https://metoc.ndbc.noaa.gov/web/guest/jtwc
]]>
CDO.JTWC.fct@navy.mil (JTWC CDO)
Central/Eastern Pacific Tropical Systems
Fri, 28 Apr 17 15:46:08 +0000
Current Southern Hemisphere Tropical Systems
https://metoc.ndbc.noaa.gov/web/guest/jtwc
Tro

In [11]:
jtwc=feedparser.parse(url) # parse the webpage

In [12]:
jtwc.keys()

['feed',
 'status',
 'updated',
 'updated_parsed',
 'encoding',
 'bozo',
 'headers',
 'etag',
 'href',
 'version',
 'entries',
 'namespaces']

In [13]:
# collect the summeries (one for each alert present in the webpage)
txt=[]
bname=[]
data=jtwc['entries']
for k in range(len(data)):
    if 'tcw' in data[k].summary : 
        bname.append(data[k].title_detail.value)
        txt.append(data[k].summary)

In [14]:
len(txt)

2

In [15]:
bname

[u'Current Northwest Pacific/North Indian Ocean* Tropical Systems',
 u'Current Southern Hemisphere Tropical Systems']

In [16]:
txt_ = ''.join(txt)

In [17]:
elp = [re.sub('<[^<]+>', "", elem) for elem in txt_.split('Tropical')] # we split the text to get the references for the alerts

In [18]:
hur = [elem.strip().split('\n')[0] for elem in elp] # the names of the TCs are...

In [19]:
hur

[u'',

In [20]:
hur = [ elem.encode('utf8').replace('"', '') for elem in hur] # get rid of unicode characters

In [21]:
hur = filter(None, hur) #filter out empty space

In [22]:
hur



In [23]:
elp = [re.sub('<[^<]+>', "", elem) for elem in txt_.split('href=')] # we now split differently to get the urls for the bulletins

In [24]:
bul = [elem for elem in elp if 'tcw' in elem] # collect the urls 

bul_ =  [elem.strip().split()[0] for elem in bul]

bul_

[u'"https://metoc.ndbc.noaa.gov/ProductFeeds-portlet/img/jtwc/products/wp0317.tcw"',
 u'"https://metoc.ndbc.noaa.gov/ProductFeeds-portlet/img/jtwc/products/sh1717.tcw"']

In [25]:
bul_ = [ elem.encode('utf8').replace('"', '') for elem in bul_] # get rid of unicode characters

In [26]:
bul_

['https://metoc.ndbc.noaa.gov/ProductFeeds-portlet/img/jtwc/products/wp0317.tcw',
 'https://metoc.ndbc.noaa.gov/ProductFeeds-portlet/img/jtwc/products/sh1717.tcw']

## For the first bulletin

In [49]:
try:
    det = urllib.urlopen(bul_[1]).read() # download and parse the bulletin file
except:
    print 'No Bulletins'

In [50]:
print det

WTXS51 PGTW 281500    
2017042812 17S FRANCES    004  01 250 05 SATL 020
T000 121S 1250E 070 R064 010 NE QD 015 SE QD 010 SW QD 010 NW QD R050 025 NE QD 030 SE QD 025 SW QD 020 NW QD R034 060 NE QD 090 SE QD 090 SW QD 050 NW QD 
T012 126S 1239E 075 R064 000 NE QD 015 SE QD 010 SW QD 000 NW QD R050 010 NE QD 030 SE QD 025 SW QD 015 NW QD R034 065 NE QD 095 SE QD 080 SW QD 040 NW QD 
T024 134S 1230E 070 R064 000 NE QD 020 SE QD 015 SW QD 000 NW QD R050 010 NE QD 035 SE QD 030 SW QD 015 NW QD R034 045 NE QD 085 SE QD 065 SW QD 030 NW QD 
T036 138S 1222E 050 R050 010 NE QD 035 SE QD 030 SW QD 015 NW QD R034 045 NE QD 085 SE QD 065 SW QD 030 NW QD 
T048 139S 1216E 040 R034 030 NE QD 075 SE QD 045 SW QD 020 NW QD 
T072 138S 1200E 030 
AMP
    048HR DISSIPATING AS A SIGNIFICANT TROPICAL CYCLONE OVER WATER
    072HR DISSIPATED AS A SIGNIFICANT TROPICAL CYCLONE OVER WATER
   01 ACTIVE TROPICAL CYCLONE IN SOUTHIO
   MAX SUSTAINED WINDS BASED ON ONE-MINUTE AVERAGE
   WIND RADII VALID

In [51]:
tstamp=det.split('\n')[2][:10]

In [52]:
#select the first lines where all the info is (conviniently) stored
bdata =[line.strip() for line in det.splitlines() if ('T' is line.strip()[0]) & ('QD' in line.strip()[-2:])]

In [53]:
bdata = [re.sub(' ',',',elem) for elem in bdata] # replace space with commas as delimiter

In [54]:
bdata = [v.split(',') for v in bdata] #split to array 

In [55]:
if bdata != []:
     df = pd.DataFrame(bdata) # create a dataframe

In [56]:
df.head() #check

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,T000,121S,1250E,70,R064,10,NE,QD,15,SE,...,QD,90.0,SE,QD,90.0,SW,QD,50.0,NW,QD
1,T012,126S,1239E,75,R064,0,NE,QD,15,SE,...,QD,95.0,SE,QD,80.0,SW,QD,40.0,NW,QD
2,T024,134S,1230E,70,R064,0,NE,QD,20,SE,...,QD,85.0,SE,QD,65.0,SW,QD,30.0,NW,QD
3,T036,138S,1222E,50,R050,10,NE,QD,35,SE,...,,,,,,,,,,
4,T048,139S,1216E,40,R034,30,NE,QD,75,SE,...,,,,,,,,,,


In [57]:
#expand the dataFrame to include all possible values
if df.shape[1] < 43 : df = pd.concat([df,pd.DataFrame(columns=list(np.arange(df.shape[1],43)))])

In [58]:
df # check

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
0,T000,121S,1250E,70,R064,10,NE,QD,15,SE,...,QD,90.0,SE,QD,90.0,SW,QD,50.0,NW,QD
1,T012,126S,1239E,75,R064,0,NE,QD,15,SE,...,QD,95.0,SE,QD,80.0,SW,QD,40.0,NW,QD
2,T024,134S,1230E,70,R064,0,NE,QD,20,SE,...,QD,85.0,SE,QD,65.0,SW,QD,30.0,NW,QD
3,T036,138S,1222E,50,R050,10,NE,QD,35,SE,...,,,,,,,,,,
4,T048,139S,1216E,40,R034,30,NE,QD,75,SE,...,,,,,,,,,,


In [59]:
# drop the text columns [NE,QD,....]
for ref in ['NE','NW','SW','SE','QD']:
     df = df.loc[:, (df != [ref]).all(axis=0)]

In [60]:
df = df.set_index(df.columns[0]) # set time as index

In [61]:
df.head() #check

Unnamed: 0_level_0,1,2,3,4,5,8,11,14,17,18,21,24,27,30,31,34,37,40
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T000,121S,1250E,70,R064,10,15,10,10,R050,25.0,30.0,25.0,20.0,R034,60.0,90.0,90.0,50.0
T012,126S,1239E,75,R064,0,15,10,0,R050,10.0,30.0,25.0,15.0,R034,65.0,95.0,80.0,40.0
T024,134S,1230E,70,R064,0,20,15,0,R050,10.0,35.0,30.0,15.0,R034,45.0,85.0,65.0,30.0
T036,138S,1222E,50,R050,10,35,30,15,R034,45.0,85.0,65.0,30.0,,,,,
T048,139S,1216E,40,R034,30,75,45,20,,,,,,,,,,


In [62]:
df.columns=np.arange(df.shape[1]) # rename the columns

In [63]:
df.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T000,121S,1250E,70,R064,10,15,10,10,R050,25.0,30.0,25.0,20.0,R034,60.0,90.0,90.0,50.0
T012,126S,1239E,75,R064,0,15,10,0,R050,10.0,30.0,25.0,15.0,R034,65.0,95.0,80.0,40.0
T024,134S,1230E,70,R064,0,20,15,0,R050,10.0,35.0,30.0,15.0,R034,45.0,85.0,65.0,30.0
T036,138S,1222E,50,R050,10,35,30,15,R034,45.0,85.0,65.0,30.0,,,,,
T048,139S,1216E,40,R034,30,75,45,20,,,,,,,,,,


In [64]:
#move the values to appropiate place based on the wind radii in order to force [64,50,34].

for i in range(df.shape[0]):
    if df.iloc[i,3]=='R034' : 
        df.iloc[i,13:] = df.iloc[i,3:8].values
        df.iloc[i,3:8] = None
    elif df.iloc[i,3]=='R050' :
        df.iloc[i,8:] = df.iloc[i,3:13].values
        df.iloc[i,3:8] = None


In [65]:
df #check

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
T000,121S,1250E,70,R064,10.0,15.0,10.0,10.0,R050,25.0,30.0,25.0,20.0,R034,60,90,90,50
T012,126S,1239E,75,R064,0.0,15.0,10.0,0.0,R050,10.0,30.0,25.0,15.0,R034,65,95,80,40
T024,134S,1230E,70,R064,0.0,20.0,15.0,0.0,R050,10.0,35.0,30.0,15.0,R034,45,85,65,30
T036,138S,1222E,50,,,,,,R050,10.0,35.0,30.0,15.0,R034,45,85,65,30
T048,139S,1216E,40,,,,,,,,,,,R034,30,75,45,20


In [66]:
df = df.drop(df.columns[[3,8,13]], axis=1) # drop text columns [R064,R050,R034]

In [67]:
#set columns
df.columns=['lat','lon','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw']

In [68]:
df #check

Unnamed: 0_level_0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
T000,121S,1250E,70,10.0,15.0,10.0,10.0,25.0,30.0,25.0,20.0,60,90,90,50
T012,126S,1239E,75,0.0,15.0,10.0,0.0,10.0,30.0,25.0,15.0,65,95,80,40
T024,134S,1230E,70,0.0,20.0,15.0,0.0,10.0,35.0,30.0,15.0,45,85,65,30
T036,138S,1222E,50,,,,,10.0,35.0,30.0,15.0,45,85,65,30
T048,139S,1216E,40,,,,,,,,,30,75,45,20


In [69]:
tidx = [np.float(elem[1:]) for elem in df.index.values] #convert time from string to float 

In [70]:
df.index=tidx # set float time as index

In [71]:
df # check

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,121S,1250E,70,10.0,15.0,10.0,10.0,25.0,30.0,25.0,20.0,60,90,90,50
12.0,126S,1239E,75,0.0,15.0,10.0,0.0,10.0,30.0,25.0,15.0,65,95,80,40
24.0,134S,1230E,70,0.0,20.0,15.0,0.0,10.0,35.0,30.0,15.0,45,85,65,30
36.0,138S,1222E,50,,,,,10.0,35.0,30.0,15.0,45,85,65,30
48.0,139S,1216E,40,,,,,,,,,30,75,45,20


In [72]:
df = df.fillna(0) # replace None with zeros

In [73]:
df #check

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,121S,1250E,70,10,15,10,10,25,30,25,20,60,90,90,50
12.0,126S,1239E,75,0,15,10,0,10,30,25,15,65,95,80,40
24.0,134S,1230E,70,0,20,15,0,10,35,30,15,45,85,65,30
36.0,138S,1222E,50,0,0,0,0,10,35,30,15,45,85,65,30
48.0,139S,1216E,40,0,0,0,0,0,0,0,0,30,75,45,20


In [74]:
#create lambda functions for converting the lat lon notation to float
chlat = lambda x: '-'+x[:-1] if x[-1]=='S' else x[:-1]
chlon = lambda x: '-'+x[:-1] if x[-1]=='W' else x[:-1]

In [75]:
# convert lat,lon to -180,180
df.lat = df.lat.map(chlat)
df.lon = df.lon.map(chlon)

In [76]:
df

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,-121,1250,70,10,15,10,10,25,30,25,20,60,90,90,50
12.0,-126,1239,75,0,15,10,0,10,30,25,15,65,95,80,40
24.0,-134,1230,70,0,20,15,0,10,35,30,15,45,85,65,30
36.0,-138,1222,50,0,0,0,0,10,35,30,15,45,85,65,30
48.0,-139,1216,40,0,0,0,0,0,0,0,0,30,75,45,20


In [77]:
df[['lat','lon']] = df[['lat','lon']].apply(pd.to_numeric,downcast='float').divide(10) #convert to float and divide by 10

In [78]:
df = df.apply(pd.to_numeric,downcast='float') #convert all values to float

In [79]:
df #check

Unnamed: 0,lat,lon,vmax,64ne,64se,64sw,64nw,50ne,50se,50sw,50nw,34ne,34se,34sw,34nw
0.0,-12.1,125.0,70.0,10.0,15.0,10.0,10.0,25.0,30.0,25.0,20.0,60.0,90.0,90.0,50.0
12.0,-12.6,123.900002,75.0,0.0,15.0,10.0,0.0,10.0,30.0,25.0,15.0,65.0,95.0,80.0,40.0
24.0,-13.4,123.0,70.0,0.0,20.0,15.0,0.0,10.0,35.0,30.0,15.0,45.0,85.0,65.0,30.0
36.0,-13.8,122.199997,50.0,0.0,0.0,0.0,0.0,10.0,35.0,30.0,15.0,45.0,85.0,65.0,30.0
48.0,-13.9,121.599998,40.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,75.0,45.0,20.0


In [80]:
df.vmax = df.vmax * kt2ms # convert maximum wind speed to m/s

In [81]:
df['notes']=0 ## add column notes for compatibility with operational

In [82]:
#display track
plt.plot(df.lon,df.lat,'o--')
mplleaflet.display()

## output inpData

In [None]:
column_order=['lat','lon','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw','notes']

In [None]:
header=['lat','long','vmax','64ne','64se','64sw','64nw','50ne','50se','50sw','50nw','34ne','34se','34sw','34nw','notes']

In [None]:
#df.to_csv('tmp/inpDataJTWC.txt',index=True, columns=column_order, sep='\t', header=header)