<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Parse-program-info-and-visit-timeline-webpages" data-toc-modified-id="Parse-program-info-and-visit-timeline-webpages-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Parse program info and visit timeline webpages</a></span></li></ul></div>

# Parse program info and visit timeline webpages



In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import requests

In [2]:
# https://stackoverflow.com/questions/24124643/parse-xml-from-url-into-python-object
import traceback
import urllib3
import xmltodict

def getxml(url="https://yoursite/your.xml"):
    http = urllib3.PoolManager()

    response = http.request('GET', url)
    try:
        data = xmltodict.parse(response.data)
    except:
        print("Failed to parse xml from response (%s)" % traceback.format_exc())
        data = {}
        
    return data

def visit_xml(proposal_id=1324):
    url = f"https://www.stsci.edu/cgi-bin/get-visit-status?id={proposal_id}&markupFormat=xml&observatory=JWST"
    data = getxml(url=url)
    if 'visitStatusReport' in data:
        data = data['visitStatusReport']
        
    return data

def prop_html(proposal_id=1324):
    from bs4 import BeautifulSoup
    vgm_url = f"https://www.stsci.edu/cgi-bin/get-proposal-info?id={proposal_id}&observatory=JWST"
    html_text = requests.get(vgm_url).text
    soup = BeautifulSoup(html_text, 'html.parser')
    return soup

def program_info(proposal_id=1324):
    
    soup = prop_html(proposal_id=proposal_id)
    meta = {'proposal_id':proposal_id}
    meta['raw'] = soup
    
    if 1:
        ps = soup.findAll('p')
        meta['pi'] = ps[0].contents[1].strip()
        meta['title'] = ps[1].contents[1].strip()
        meta['cycle'] = int(ps[1].contents[5].strip())
        meta['allocation'] = float(ps[1].contents[9].strip().split()[0])
        meta['proptime'] = float(ps[1].contents[-1].strip().split()[0])
        meta['type'] = soup.findAll('h1')[0].contents[1].contents[0]
    else:
        meta['pi'] = 'x'
        meta['title'] = 'x'
        meta['cycle'] = 0
        meta['allocation'] = 0
        meta['proptime'] = 0.
        meta['type'] = 'x'
        
    visits = visit_xml(proposal_id)
    #for k in ['visit']: #visits:
    #    meta[k] = visits[k]
    if isinstance(visits['visit'], list):
        meta['visit'] = visits['visit']
    else:
        meta['visit'] = [visits['visit']]
        
    return meta

    #return soup

#visit = visit_xml(proposal_id=1324)
meta = program_info(proposal_id=1567)

In [8]:
from grizli import utils
import astropy.units as u
import astropy.time

def show_window(v, meta):
    
    row = [meta[k] for k in ['type','proposal_id','title', 'pi', 'cycle', 'allocation', 'proptime']]
    row += [v[k] for k in ['@observation', '@visit', 'target', 'configuration', 'hours']]

    if 'planWindow' not in v:
        row.extend(['2029-01-01','-','-'])
        return row, None
        
    w = v['planWindow']

    if '(2' in w:
        dates = ('2'+w.split('(2')[1].strip()).strip(')').split(' - ')
    else:
        row.extend(['2029-01-01','-','-'])
        return row, None
    
    inst = v['configuration'].strip().split()[0].lower()

    colors = {'niriss':utils.MPL_COLORS['b'], 
              'nirspec':utils.MPL_COLORS['orange'], 
              'miri':utils.MPL_COLORS['r'],
              'nircam':utils.MPL_COLORS['g']}
    
    if inst not in colors:
        colors[inst] = '0.5'
        
    fig, ax = plt.subplots(1,1,figsize=(8,0.4))

    wlim = astropy.time.Time(['2022:150','2024:300'], format='yday')
    # ax.plot_date(wlim.plot_date, [0,0], color='w', lw=5, linestyle='-')

    wdate = astropy.time.Time([d.replace('.',':') for d in dates], format='yday')
    ax.plot_date(wdate.plot_date, [0,0], color=colors[inst], lw=2, fmt='s-')

    prop = meta['proptime']
    #prop = 6

    if prop > 0:
        pub = wdate + prop/12.*u.year
        ax.plot_date(wdate.plot_date, [0,0], fmt='o', zorder=100, color='w', markersize=4)
        ax.plot_date(pub.plot_date, [0,0], color=colors[inst], lw=2, linestyle=':', fmt='s-', 
                     alpha=0.5)

    wgrid = astropy.time.Time(['2022-07-01', '2022-10-01', 
                               '2023-01-01', '2023-04-01', '2023-07-01', '2023-10-01', 
                               '2024-01-01', '2024-04-01', '2024-07-01', '2024-10-01'])

    ax.vlines(wgrid.plot_date[[2,6]], -1, 1, color='k', alpha=0.2)
    ax.set_ylim(-0.2, 0.2)
    #ax.axis('off')

    #ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    #ax.spines['left'].set_visible(False)

    ax.set_xticks(wgrid.plot_date)
    ax.set_xlim(*wlim.plot_date)

    ax.tick_params(length=0, which='both')

    ax.get_yaxis().set_visible(False)

    ax.set_xticklabels([])
    ax.set_yticklabels([])

    ax.grid()
    
    iso = [w.iso.split()[0] for w in wdate]
    # dates, meta['visit'][0], meta

    row += iso

    timeline = f"jwst_{meta['proposal_id']}_{v['@observation']}_{v['@visit']}"
    
    row.append(f'<img src="{timeline}.png" />')
    
    fig.savefig(timeline+'.png')

    return row, fig

In [9]:
progs = [1837, # primer
         1345, # ceers
         1433, # coe
         1727, # cosmos
         2079, # ngdeep
         2426, # bagley
         2659, # beasts bubbles
         2561, # uncover
         1914, # aurora
         1895, # fresco
         1567, # seiji
         1324, # glass
         1208, # canucs
         1180, # eisenstein, gs1
         1286, # gs2
         1287, # gs3
         1181, # gn1
         1210, # ferruit
         1211, # ferruit NRS GTO
         1213, 
         1214, 
         1215, 
         1216,
         1217, 
         1262, 
         1263, # colina
         1284, # colina 2
         1264, # colina 3
         1283, # miri udf
         1243, # lilly
         1176, # windhorst
         1207, # rieke
         2282, # earendel
         1869, # LyC22
         1626, # stefanon
         1657, # harikane
         1671, # maseda
         1740, # harikane 2
         1747, # roberts-borsani
         1758, # finkelstein
         1791, # spilker
         1810, # belli
         1933, # mathee
         1963, # williams udf
         2110, # kriek
         2136, # tucker
         2279, # naidu
         2285, # carnall
         2345, # newman
         2362, # marsan
         2478, # stark
         2484, # mirka
         2516, # hodge
         2555, # sunburst
         2565, # glazebrook, 
         1908, # vanzella
         1764, # fan agn
         1964, # overzier
         1328, # armus
         1355, # rigby
         1199, # stiavelli
         2123, # kassin
         2130, # lee
         2198, # barrufet
         2234, # banados
         2321, # best
         2566, 
         2674,
         1871,
         2078, 
         1678, # ashby
         2107, 
         1717, 
         1554,
         2593, # strom
        ]

In [10]:
from bs4 import BeautifulSoup
import requests

progs = []

for url in ["https://www.stsci.edu/jwst/science-execution/approved-ers-programs", 
            "https://www.stsci.edu/jwst/science-execution/approved-programs/cycle-1-gto", 
            "https://www.stsci.edu/jwst/science-execution/approved-programs/cycle-1-go"]:
    
    html_text = requests.get(url).text
    soup = BeautifulSoup(html_text, 'html.parser')
    tabs = soup.findAll('tbody')
    for tab in tabs:
        for link in tab.findAll('a'):
            try:
                prop = int(link.getText())
                progs.append(prop)
            except:
                continue

len(progs)

419

In [11]:
rows = []
done = []
failed = []

len(progs)
res = {}

In [12]:
os.chdir('/Users/gbrammer/Research/JWST/LRP')
names = ['type','proposal_id','title', 'pi', 'cycle', 'allocation', 'proptime',
         'observation', 'visit', 'target', 'configuration', 'hours', 
         'win_start', 'win_end', 'timeline']

# progs = [1963]

for i, prog in enumerate(progs):
    print(i, prog)
    if prog in done:
        continue
    
    try:
        m = program_info(proposal_id=prog)
    except IndexError:
        failed.append(prog)
        print('Fail')
        continue
    except TypeError:
        failed.append(prog)
        continue
        
    done.append(prog)
    res[prog] = m
    
    for v in m['visit']:
        try:
            row, fig = show_window(v, m)  
        except KeyError:
            continue
            
        rows.append(row)
    
        plt.close('all')
    

0 1324
1 1328
2 1345
3 1355
4 1335
5 1364
6 1366
7 1386
8 1373
9 1288
10 1309
11 1349
12 1334
13 1189




14 1202




15 1209




16 1228




17 1229




18 1230




19 1256




20 1413




21 1414
22 1199




23 1208




24 1183




25 1192




26 1206




27 1282






28 1294




29 1411




30 1180




31 1181




32 1207




33 1210




34 1211




35 1213




36 1214




37 1215




38 1216




39 1217




40 1262




41 1263




42 1283




43 1284




44 1286




45 1287




46 1177




47 1184
48 1185




49 1188




50 1193






51 1194




52 1195
Fail
53 1200




54 1201




55 1224




56 1241




57 1270




58 1274




59 1275




60 1276




61 1277




62 1278




63 1279




64 1280




65 1281




66 1292




67 1312




68 1331




69 1353




70 1412




71 1198




72 1205




73 1218




74 1219




75 1220




76 1222




77 1243




78 1264




79 1179




80 1186




81 1187






82 1235




83 1236




84 1257




85 1258




86 1282
87 1290






88 1293




89 1191




90 1231




91 1244
92 1245
93 1246
94 1247
95 1248
96 1249
97 1250
98 1251
99 1252
100 1253
101 1254




102 1255
Fail
103 1271
104 1272




105 1273




106 1415
107 1182




108 1190




109 1232




110 1237




111 1238




112 1239




113 1240
114 1242




115 1306




116 1176




117 1178




118 1204




119 1223




120 1225




121 1226




122 1227




123 1233




124 1234




125 1260




126 1265




127 1266




128 1267




129 1268




130 1269




131 1304




132 1305




133 1549
134 1556
135 1563




136 1584
137 1618




138 1633




139 1640
140 1647




141 1668




142 1676




143 1729




144 1743




145 1751




146 1759




147 1803




148 1843




149 1846




150 1902
151 1911




152 1935




153 1952




154 1958




155 1977
Fail
156 1981
157 2001




158 2008
159 2021




160 2025




161 2044




162 2053




163 2055




164 2062




165 2084




166 2113




167 2149




168 2153




169 2158




170 2159




171 2232
Fail
172 2243




173 2260




174 2278




175 2297




176 2304




177 2311




178 2319




179 2334




180 2347




181 2358




182 2372




183 2420
184 2437




185 2454




186 2487




187 2488




188 2498




189 2507




190 2508
191 2509
Fail
192 2512
193 2538




194 2540
195 2562




196 2571




197 2589
198 2594




199 2627
200 2667




201 2708




202 2722




203 1433
204 1567




205 1571
206 1572




207 1626




208 1635




209 1657




210 1671




211 1701






212 1712




213 1721
Fail
214 1740




215 1747




216 1758




217 1762




218 1783






219 1791




220 1810




221 1827




222 1837
223 1840




224 1864




225 1865




226 1869




227 1879




228 1883
Fail
229 1893




230 1895
231 1914




232 1933




233 1963
234 1991




235 2079
236 2110
237 2123
238 2130
239 2136
240 2162




241 2186




242 2198




243 2219




244 2234




245 2279




246 2282
247 2285




248 2301




249 2321




250 2344




251 2345




252 2354
Fail
253 2362




254 2368




255 2391




256 2410
Fail
257 2417




258 2424




259 2426




260 2439




261 2452




262 2457




263 2478




264 2484




265 2511




266 2514
267 2516




268 2555




269 2561
270 2565




271 2566




272 2567




273 2581




274 2593




275 2659




276 2674




277 2687
Fail
278 1835




279 2073




280 2180
281 1638




282 1727
283 1794




284 1871




285 1995




286 2046




287 2078




288 2134




289 2608
Fail
290 1424
291 1566
292 1568






293 1585
Fail
294 1598
Fail
295 1604




296 1658




297 1731




298 1786
299 1897
300 2037
Fail
301 2060
302 2127
303 2211
304 2337
305 2361
306 2416




307 2418
308 2524
Fail
309 2537
Fail
310 2550




311 2574




312 1586
313 1621




314 1644




315 1666
316 1706




317 1714




318 1726




319 1741




320 1742




321 1798
322 1802




323 1860




324 1863




325 1874




326 1905




327 1906
328 1936
329 1947




330 1959




331 1960




332 1979




333 1984
334 2050




335 2061
336 2072
337 2091
338 2114
339 2122
340 2124




341 2204




342 2209




343 2288
344 2302








345 2327




346 2348




347 2395
348 2473




349 2559




350 2635




351 2640




352 2666




353 2692




354 1557
Fail
355 1558




356 1591




357 1611




358 1617




359 1619




360 1678
361 1685




362 1854




363 1908




364 1939




365 2045




366 2074
Fail
367 2092




368 2104




369 2107
370 2128




371 2143




372 2151




373 2155




374 2183






375 2221




376 2317




377 2331




378 2352
Fail
379 2402




380 2433




381 2441




382 2459




383 2491




384 2521




385 2526




386 2560




387 2609




388 2662




389 2677




390 1554




391 1670




392 1717
393 1760




394 1764




395 1813




396 1875




397 1928




398 1964




399 1967




400 1970




401 1983




402 2004




403 2016




404 2028




405 2057




406 2064




407 2075




408 2080






409 2108
Fail
410 2177




411 2228




412 2235




413 2249




414 2446
Fail
415 2547




416 2554
Fail
417 2654




418 2701




In [13]:
failed

[1195,
 1255,
 1977,
 2232,
 2509,
 1721,
 1883,
 2354,
 2410,
 2687,
 2608,
 1585,
 1598,
 2037,
 2524,
 2537,
 1557,
 2074,
 2352,
 2108,
 2446,
 2554]

In [35]:
tab = utils.GTable(rows=rows, names=names)
# tab['timeline'] = [f'<img src="{t}.png" />' for t in tab['timeline']]
tab['proposal_id'] = [f'<a href="https://www.stsci.edu/cgi-bin/get-proposal-info?id={p}&observatory=JWST">{p}</a>'
                      for p, t in zip(tab['proposal_id'], tab['type'])]

# tab['pi'] = [p.strip().split()[-1] for p in tab['pi']]

prop = []
for p in tab['proptime']:
    if p > 0:
        prop.append(f'<p style="color:red;"> {p:.0f} </p>')
    else:
        prop.append(f'<p style="color:green;"> {p:.0f} </p>')

tab['proptime'] = prop

targ = []
for t in tab['target']:
    if isinstance(t, list):
        targ.append('; '.join(t))
    else:
        targ.append(t)
        
tab['target'] = targ

tab.rename_column('proposal_id','ProgID')
tab.rename_column('proptime','Prop')
tab.rename_column('pi','PI')
tab.rename_column('title','Full_Proposal_Title')
tab.rename_column('allocation', 'Total')
tab.rename_column('hours', 'Hours')

tab['Prop'].description = 'Proprietary time, months'
tab['Hours'].description = 'Visit duration'
tab['Total'].description = 'Total program allocation, hours'
tab['win_start'].description = 'Beginning of LRP scheduling window'
tab['win_end'].description = 'End of LRP scheduling window'

tab.rename_column('observation','Obs')
                  
tab.write_sortable_html('full_timeline.html', use_json=True, 
                        localhost=False, max_lines=10000, filter_columns=['Total', 'Prop','Hours','win_start','win_end'])

with open('full_timeline.html') as fp:
    lines = fp.readlines()
#
lines.insert(-3, f'<p style="font-size:x-small;"> Generated {astropy.time.Time.now().iso} by {os.getlogin()} with <a href="./lrp-calendar.ipynb">lrp-calendar.ipynb</a> </p>\n')
with open('full_timeline.html','w') as fp:
    fp.writelines(lines)

In [26]:
len(tab)

3720

In [23]:
tab['target']

0
ABELL2744
ABEL2744CLS-V12
NGC3256-NUC1-MRS
NGC3256-NUC2-MRS
NGC3256-BK
NGC3256-CENTERED
NGC3256-CENTERED
NGC3256-NUC1
NGC3256-NUC2
NGC-7469-MRS


418