In [1]:
import re
import tqdm
import urllib
from bs4 import BeautifulSoup

import requests

import collections

from astropy import units as u
from astropy.io import ascii

In [2]:
def int_to_roman(num):
    roman = collections.OrderedDict()
    roman[1000] = "M"
    roman[900] = "CM"
    roman[500] = "D"
    roman[400] = "CD"
    roman[100] = "C"
    roman[90] = "XC"
    roman[50] = "L"
    roman[40] = "XL"
    roman[10] = "X"
    roman[9] = "IX"
    roman[5] = "V"
    roman[4] = "IV"
    roman[1] = "I"

    def roman_num(num):
        for r in roman.keys():
            x, y = divmod(num, r)
            yield roman[r] * x
            num -= (r * x)
            if num > 0:
                roman_num(num)
            else:
                break

    return "".join([a for a in roman_num(num)])

# Test run 

In [3]:
example_url = 'https://koa.ipac.caltech.edu/cgi-bin/bgServices/nph-bgExec?bgApp=/KOA/nph-KOA&instrument_de=deimos&filetype=science&calibassoc=assoc&locstr=and+xix&regSize=5&resolver=ned&radunits=arcmin&spt_obj=spatial&single_multiple=single'

In [4]:
base_url = 'https://koa.ipac.caltech.edu/cgi-bin/bgServices/nph-bgExec'
encoded = urllib.parse.urlencode({'bgApp':'/KOA/nph-KOA',
                        'instrument_de':'deimos',
                        'filetype':'science',
                        'calibassoc':'assoc',
                        'locstr':'And XIX',
                        'regSize':'5',
                        'radunits':'arcmin',
                        'resolver':'ned',
                        'spt_obj':'spatial',
                        'single_multiple':'single'
                        })
url = base_url + '?' + encoded
url

'https://koa.ipac.caltech.edu/cgi-bin/bgServices/nph-bgExec?spt_obj=spatial&locstr=And+XIX&radunits=arcmin&bgApp=%2FKOA%2Fnph-KOA&calibassoc=assoc&regSize=5&resolver=ned&filetype=science&single_multiple=single&instrument_de=deimos'

In [5]:
res = requests.get(url)

In [6]:
soup = BeautifulSoup(res.text, 'html.parser')

In [7]:
sci = [tag for tag in soup.find_all('a', {'target':'koa'}, string=re.compile('.*DEIMOS.*')) if 'Science' in tag.text]
assert len(sci)==1
sci_url = '://'.join(urllib.parse.urlparse(base_url)[:2]) + sci[0].get('href')

calib = [tag for tag in soup.find_all('a', {'target':'koa'}, string=re.compile('.*DEIMOS.*')) if 'Calibration' in tag.text]
assert len(calib)==1
calib_url = '://'.join(urllib.parse.urlparse(base_url)[:2]) + sci[0].get('href')

sci_url

'https://koa.ipac.caltech.edu/workspace/TMP_GMiqMN_9368/KOA/sci_de_9368.tbl'

In [8]:
sci_tab = ascii.read(sci_url)

Downloading https://koa.ipac.caltech.edu/workspace/TMP_GMiqMN_9368/KOA/sci_de_9368.tbl [Done]


# Real business 

In [9]:
def target(name, radius, calib=False, verbose=True):
    amin = radius.to(u.arcmin).value
    
    base_url = 'https://koa.ipac.caltech.edu/cgi-bin/bgServices/nph-bgExec'
    encoded = urllib.parse.urlencode({'bgApp':'/KOA/nph-KOA',
                            'instrument_de':'deimos',
                            'filetype':'science',
                            'calibassoc':'assoc',
                            'locstr':name,
                            'regSize': amin,
                            'radunits':'arcmin',
                            'resolver':'ned',
                            'spt_obj':'spatial',
                            'single_multiple':'single'
                            })
    url = base_url + '?' + encoded
    
    if verbose:
        print('search URL:', url)
    
    res = requests.get(url)
    
    soup = BeautifulSoup(res.text, 'html.parser')
    
    sciorcalib = 'Calibration' if calib else 'Science'
    metadata = [tag for tag in soup.find_all('a', {'target':'koa'}, string=re.compile('.*DEIMOS.*')) if sciorcalib in tag.text]
    assert len(metadata)==1
    metadata_url = '://'.join(urllib.parse.urlparse(base_url)[:2]) + sci[0].get('href')
    
    return ascii.read(metadata_url)

In [10]:
scifiles = {}

In [None]:
names = ['And '+int_to_roman(i+1)  for i in range(33) if i+1 != 4]

for nm in tqdm.tqdm_notebook(names):
    print(nm)
    if nm in scifiles:
        print('Already done')
    try:
        tab = target(nm, 20*u.arcmin)
        scifiles[nm] = tab
        print('Completed')
    except Exception as e:
        print('failed:\n', repr(e))

And I
search URL: https://koa.ipac.caltech.edu/cgi-bin/bgServices/nph-bgExec?spt_obj=spatial&locstr=And+I&radunits=arcmin&bgApp=%2FKOA%2Fnph-KOA&calibassoc=assoc&regSize=20.0&resolver=ned&filetype=science&single_multiple=single&instrument_de=deimos


In [None]:
for nm, tab in scifiles.items():
    print(nm, len(tab))