In [96]:
# Main goal here is to produce a new version of the PIC/MAGIC table,
# at "http://vobs.magic.pic.es/fits/". The table publish MAGIC observations
# published in articles. The new version we want to produce adds the 
# following aspects respect to the current one:
#  - FITS/SPECTRUM data units in units of 'erg/s/cm2'
#  - objects positions, besides the name
%connect_info

{
  "stdin_port": 59784, 
  "ip": "127.0.0.1", 
  "control_port": 37545, 
  "hb_port": 34357, 
  "signature_scheme": "hmac-sha256", 
  "key": "e698b5f0-f9e2-414d-8451-6f977daaee00", 
  "shell_port": 42872, 
  "transport": "tcp", 
  "iopub_port": 34588
}

Paste the above JSON into a file, and connect with:
    $> ipython <app> --existing <file>
or, if you are local, you can connect with just:
    $> ipython <app> --existing kernel-b3eeb955-58fd-4890-8c7c-d26a9597d49f.json 
or even just:
    $> ipython <app> --existing 
if this is the most recent IPython session you have started.


In [97]:
# First thing: let's parse the HTML table.
# From that table we want to keep the columns
# 'Source','Article' and to download the FITS files.
from collections import OrderedDict

url = 'http://vobs.magic.pic.es/fits/'

def parse(url):
    """
    Read the (html) table from the given url
    """
    from bs4 import BeautifulSoup as bs
    import urllib2
    import sys
    out = OrderedDict()
    out['source'] = []
    out['article'] = []
    out['fitsfile'] = []
    
    soup = bs(urllib2.urlopen(url).read())
    table = soup.find('table',{'class','mytable'})
    for row in table.findAll('tr'):
        cells = row.findAll('td')
        if len(cells)!=5:
            continue
        # Source name column
        src = cells[0].find(text=True)
        src = src.strip()
        # Article eletronic reference
        art = cells[1].find('a',href=True)
        art_url = art['href']
        #art_url = get_doi_url(art_url)
        # Internal reference...we don't care about it...
        #ref = cells[3].find(text=True).encode('utf8')
        # FITS file relative path
        fits = cells[4]
        file = fits.find('a',href=True)
        try:
            fileurl = file['href']
        except:
            fileurl = None
        file_url = url+fileurl if fileurl!=None else '_NULL_'
        out['source'].append(src)
        out['article'].append(art_url)
        out['fitsfile'].append(file_url)
    assert len(out['source'])==len(out['article'])
    assert len(out['source'])==len(out['fitsfile'])
    return out

table_parsed = parse(url)
import pprint as pp
pp.pprint(table_parsed)

OrderedDict([('source', [u'Mrk 421', u'Crab Nebula', u'PG 1553+113', u'1ES 0806+524', u'HESS J1857+026', u'J2001+439', u'Mrk 501', u'1ES 0033+595', u'IC 310', u'PKS 1424+240', u'3C 58', u'AE Aquarii', u'PKS 1510-089', u'3C 279', u'4C +55.17', u'Crab Pulsar', u'NGC 1275', u'1ES 1727+50', u'4C +21.35', u'IC 310', u'Segue 1', u'GRB 090102', u'1ES 2344+514', u'Swift J1644+57', u'4U 0142+61, 1ES 2259+586', u'1ES 1215+303', u'M87', u'PKS 2155-304', u'HESS J0632+057', u'Markarian 421', u'Perseus Cluster', u'W51', u'B3 2247+381', u'NGC 1275', u'LS I +61 303', u'M87', u'Crab Pulsar', u'Crab Nebula', u'Crab Pulsar', u'Scorpius X-1', u'Segue 1', u'Markarian 421', u'PG 1553+113', u'3C279', u'PKS 1222+21', u'3C 66A', u'IC 310', u'1FGL J1954.3+2836, 1FGL J1958.6+2845', u'Cygnus X-3', u'Markarian 501', u'Markarian 501', u'Markarian 421, Markarian 501', u'1ES 0333+595, RGB J117+202, B2 1215+30, ...', u'Pulsar-PWN Systems', u'Markarian 421', u'PG 1553+113', u'LS I +61 303', u'GRB 080430', u'Markarian 5

In [98]:
# Then we should be able to write it down.
def write_csv_from_dict(dtable,output):
    """
    Write down the table represented by the dictionary 'dtable'
    """
    def check_table_size(tab):
        sz = {}
        for k in tab.keys():
            l = len(tab[k])
            sz[l] = k
        if len(sz)>1:
            print("error: columns have different sizes!", tab)
            return False
        return sz.keys()[0]
    leng = check_table_size(dtable)
    if leng==False:
        return None
    assert leng>0
    try:
        fp = open(output,'w')
        for i in range(leng):
            fp.write('"%s"|%s|%s\n' % (dtable['source'][i],dtable['article'][i],dtable['fitsfile'][i]))
        fp.close()
    except:
        return None
    return output

import os
import glob
outdir = 'pic_parsing_files'
if os.path.exists(outdir) and os.path.isdir(outdir):
    for f in glob.glob(os.path.join(outdir,'*')):
        os.remove(f)
    os.rmdir(outdir)
os.mkdir(outdir)
output = os.path.join(os.getcwd(),outdir,'magic-pic-fits_table.psv')
filepath = write_csv_from_dict(table_parsed,output)
print filepath

/home/chbrandt/fido/booq/docs/notebooks/pic_parsing_files/magic-pic-fits_table.psv


In [99]:
# Now we go one step forward and download the FITS files from PIC.
# We also parse the article's url and get its DOI value.

table_valid = OrderedDict()
table_fails = OrderedDict()
for k in table_parsed.keys():
    table_valid[k] = []
    table_fails[k] = []

def download_file(fileurl):
    ff = fileurl
    cmd = 'cd '+outdir+' && curl -f -O '+ff
    sts = os.system(cmd)
    if sts!=0:
        return False
    return True

def get_doi_url(url):
    """
    Given url has a "doi' line; read it.
    """
    from bs4 import BeautifulSoup as bs
    import urllib2
    import re
    _ads = 'adsabs'
    _cds = 'cdsads'
    _axv = 'arxiv'
    if (_ads in url or _cds in url or _axv in url):
        soup = bs(urllib2.urlopen (url))
        trs = soup.findAll('tr')
        filter(lambda x: 'doi' in x.get_text().lower(), trs)
        tr = filter(lambda x: 'doi' in x.get_text().lower(), trs)[0]
        doi = tr.get_text()
        url = 'http://dx.doi.org/'
        url += re.sub('DOI:','',re.sub('\n','',doi))
    else:
        url = re.sub('\?.*','',url)
    return url

def append_values(table,_s,_a,_f):
    table['source'].append(_s)
    table['article'].append(_a)
    table['fitsfile'].append(_f)
    
leng = len(table_parsed['source'])
for i in range(leng):
    s = table_parsed['source'][i]
    a = table_parsed['article'][i]
    f = table_parsed['fitsfile'][i]
    if f=='_NULL_':
        append_values(table_fails,s,a,f)
        continue
    if not download_file(f):
        f = '_FAIL_'
        append_values(table_fails,s,a,f)
        continue
    f = os.path.basename(f)
    a = get_doi_url(a)
    append_values(table_valid,s,a,f)

In [100]:
pp.pprint(table_valid)

OrderedDict([('source', [u'Crab Nebula', u'1ES 0806+524', u'IC 310', u'PKS 1424+240', u'3C 58', u'AE Aquarii', u'3C 279', u'4C +55.17', u'Crab Pulsar', u'NGC 1275', u'IC 310', u'Segue 1', u'GRB 090102', u'1ES 2344+514', u'Swift J1644+57', u'4U 0142+61, 1ES 2259+586', u'1ES 1215+303', u'M87', u'PKS 2155-304', u'Perseus Cluster', u'W51', u'B3 2247+381', u'Scorpius X-1', u'Segue 1', u'PKS 1222+21', u'IC 310', u'1FGL J1954.3+2836, 1FGL J1958.6+2845', u'Cygnus X-3', u'Markarian 421, Markarian 501', u'1ES 0333+595, RGB J117+202, B2 1215+30, ...', u'Markarian 421', u'PG 1553+113', u'Markarian 501', u'Perseus Cluster', u'S5 0716+714', u'OJ 287', u'M13', u'Markarian 421', u'3C 454.3', u'3C 66A/B', u'Markarian 421', u'WR Binary System', u'3C 279', u'LS I +61 303', u'M87', u'LS I +61 303', u'1ES1959+650', u'dSph Draco', u'1ES1011+496', u'Cassiopeia A', u'IC 443', u'Cygnus X-1', u'Markarian 421', u'Markarian 180', u'LS I +61 303', u'HESS J1813-178', u'Galactic Center']), ('article', [u'http://dx.d

In [101]:
pp.pprint(table_fails)

OrderedDict([('source', [u'Mrk 421', u'PG 1553+113', u'HESS J1857+026', u'J2001+439', u'Mrk 501', u'1ES 0033+595', u'PKS 1510-089', u'1ES 1727+50', u'4C +21.35', u'HESS J0632+057', u'Markarian 421', u'NGC 1275', u'LS I +61 303', u'M87', u'Crab Pulsar', u'Crab Nebula', u'Crab Pulsar', u'Markarian 421', u'PG 1553+113', u'3C279', u'3C 66A', u'Markarian 501', u'Markarian 501', u'Pulsar-PWN Systems', u'LS I +61 303', u'GRB 080430', u'Willman 1', u'Crab Pulsar', u'PG 1553+113', u'BL Lac', u'TeV J2032+4130', u'Crab Pulsar', u'PSR B1951+32/CTB 80', u'Markarian 501', u'BL Lacertae', u'1ES 2344+514', u'Arp 220', u'PG 1553+113', u'1ES1218+30.4', u'HESS J1834-087/W41', u'GRB050713a', u'1ES1959+650']), ('article', ['http://cdsads.u-strasbg.fr/abs/2014arXiv1412.3576T', 'http://cdsads.u-strasbg.fr/abs/2014arXiv1408.1975M', 'http://cdsads.u-strasbg.fr/abs/2014A%26A...571A..96A', 'http://cdsads.u-strasbg.fr/abs/2014arXiv1409.3389M', 'http://cdsads.u-strasbg.fr/abs/2014arXiv1410.6391M', 'http://cdsads.u

In [104]:
# And now let's convert the quantities and resolve name/position
from astropy.io import fits
from astropy import units
Unit = units.Unit
import numpy as np
import glob
from collections import OrderedDict

def transf_data(tabled,tableu):
    uEn = Unit('Hz')
    uFn = Unit('erg s-1 cm-2')
    uEc = Unit('TeV')
    conv = {Unit('ph TeV s-1 cm-2') : lambda x,y: (x/Unit('ph')).to(uFn),
            Unit('ph TeV-1 s-1 cm-2') : lambda x,y: ((y.to(uEc)**2)*(x/Unit('ph'))).to(uFn),
            Unit('ph s-1 cm-2') : lambda x,y: None}
    uE = Unit(tableu['energy_unit'])
    vE = np.array(tabled['energy'],np.float)
    vE = vE * uE
    vEn = vE.to(uEn, equivalencies=units.spectral())
    uF = Unit(tableu['flux_unit'])
    vF = np.array(tabled['flux'],np.float)
    vF = vF * uF
    vFn = conv[uF](vF,vE)
    if vFn is None:
        print "vFn is None!?!:",str(uF)
        return False
    tableu['energy_unit'] = uEn.to_string(units.format.CDS)
    tabled['energy'] = vEn.value
    tableu['flux_unit'] = uFn.to_string(units.format.CDS)
    tabled['flux'] = vFn.value
    try:
        uDE = Unit(tableu['Denergy_unit'])
        vDE = np.array(tabled['Denergy'],np.float)
        vDE = vDE * uDE
        vDEn = vDE.to(uEn, equivalencies=units.spectral())
        tableu['Denergy_unit'] = uEn.to_string(units.format.CDS)
        tabled['Denergy'] = vDEn.value
    except:
        pass;
    try:
        uDF = Unit(tableu['Dflux_unit'])
        vDF = np.array(tabled['Dflux'],np.float)
        vDF = vDF * uDF
        vDFn = conv[uDF](vDF,vE) # Notice we use the energy bin/value of the measurement.
        tableu['Dflux_unit'] = uFn.to_string(units.format.CDS)
        tabled['Dflux'] = vDFn.value
    except:
        pass;
    return True

def write_csv(table,outdir,extras={}):
    fname = table['filename'][:-5]+'.'+table['EXTNAME']+'.psv'
    fname = os.path.join(outdir,fname)
    fp = open(fname,'w')
    _c = '#'
    fp.write("%s filename: %s\n" % (_c,table['filename']))
    fp.write("%s EXTNAME: %s\n" % (_c,table['EXTNAME']))
    fp.write("%s DATE-OBS: %s\n" % (_c,table['DATE-OBS']))
    fp.write("%s OBJECT: %s\n" % (_c,table['OBJECT']))
    fp.write("%s ra: %s\n" % (_c,table['ra']))
    fp.write("%s dec: %s\n" % (_c,table['dec']))
    extras_keys = extras.keys()
    if len(extras_keys)>0:
        extras_keys.sort()
        for k in extras_keys:
            fp.write("%s %s: %s\n" % (_c,k,extras[k]))
    _eu = table['energy_unit']
    _deu = table['Denergy_unit']
    _fu = table['flux_unit']
    _dfu = table['Dflux_unit']
    _s = '|'
    fp.write("%s energy(%s) %s energy_error(%s) %s flux(%s) %s flux_error(%s) %s\n" % (_s,_eu,_s,_deu,_s,_fu,_s,_dfu,_s))
    for i in range(len(table['energy'])):
        for k in ['energy','Denergy','flux','Dflux']:
            vec = table[k]
            val = vec[i] if vec is not None else '-999'
            fp.write("%s %s" % (_s,val))
        fp.write("%s\n" % (_s))
    fp.close()
    return fname

def objname2pos(name):
    from astropy.coordinates import SkyCoord
    out = {}
    try:
        pos = SkyCoord.from_name(name)
        out['coordinates'] = pos.frame.name
        out['ra'] = pos.ra.value
        out['dec'] = pos.dec.value
    except:
        out['coordinates'] = None
        out['ra'] = None
        out['dec'] = None
    return out

def read_fits(filename):
    """
    """
    import os
    def select_dataUnits(hdulist,du_name='SPECTRUM'):
        dus = filter(lambda x:du_name in x.name, hdulist)
        return dus
    
    def attempt_fix_objectname(table):
        import re
        assert table['OBJECT'] is None
        _fn = os.path.basename(table['filename'])[:-5]
        objname = re.sub('MAGIC_20[0-9][0-9]_','',_fn)
        table['OBJECT'] = objname.split('_')[0]
        print '*** OBJECT name got from file name.'
    
    def read_header(spec,tablev):
        import re
        assert isinstance(tablev,dict)
        def read_keyword(header,tablev,word):
            tablev[word] = header.get(word, None)
        header = spec.header
        read_keyword(header,tablev, 'EXTNAME')
        read_keyword(header,tablev, 'OBJECT')
        tablev['DATE_OBS'] = header.get('DATE-OBS', None)
        if tablev['DATE_OBS']:
            tablev['DATE_OBS'] = re.sub('--$','',tablev['DATE_OBS'])
    
    def read_data(spec,tabled,tableu):
        assert isinstance(tabled,dict)
        assert isinstance(tableu,dict)
        cols = zip(spec.columns.names,spec.columns.units)
        for n in ['energy','Denergy','flux','Dflux']:
            tableu[n+'_unit'] = None
            tabled[n] = None
        for n,u in cols:
            if (n == 'energy' or n == 'Denergy' or n == 'flux' or n == 'Dflux'):
                tableu[n+'_unit'] = u
                tabled[n] = spec.data[n]
            else:
                print "Different field:",n

    try:
        hdulist = fits.open(filename)
    except:
        return None
    
    spectra = select_dataUnits(hdulist)
    
    out_tables = []
    
    for spec in spectra:
        tablev = OrderedDict()
        tablev['filename'] = os.path.basename(filename)
        tablev['EXTNAME'] = None
        tablev['OBJECT'] = None
        tablev['DATE_OBS'] = None
        tablev['coordinates'] = None
        tablev['ra'] = None
        tablev['dec'] = None
        tabled = OrderedDict()
        tabled['energy'] = None
        tabled['Denergy'] = None
        tabled['flux'] = None
        tabled['Dflux'] = None
        tableu = OrderedDict()
        tableu['energy_unit'] = None
        tableu['Denergy_unit'] = None
        tableu['flux_unit'] = None
        tableu['Dflux_unit'] = None
        read_header(spec,tablev)
        if tablev['OBJECT'] is None:
            attempt_fix_objectname(tablev)
        pos = objname2pos(tablev['OBJECT'])
        tablev['coordinates'] = pos['coordinates']
        tablev['ra'] = pos['ra']
        tablev['dec'] = pos['dec']
        read_data(spec,tabled,tableu)
        out_tables.append({'data':tabled,'variables':tablev,'units':tableu})
    
    return out_tables

indir = 'pic_parsing_files'
fp = open('objs_posdate.csv','w')

cnt = -1
for fn in table_valid['fitsfile']:
    cnt+=1
    print
    print "Processing file:",fn,
    out = read_fits(os.path.join(indir,fn))
    if out is None:
        print " ..failed: to open." 
        continue
    print
    for tabs in out:
        td = tabs['data']
        tu = tabs['units']
        tv = tabs['variables']
        transf_data(td,tu)
        reference = table_valid['article'][cnt]
        tv['article'] = str(reference)
        fp.write("%s,%s,%s,%s,%s,%s\n" % (tv['OBJECT'],tv['OBJECT'],tv['article'],tv['DATE_OBS'],tv['ra'],tv['dec']))
fp.close()
print


Processing file: MAGIC_2015_CrabNebula.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.


Processing file: MAGIC_2015_1ES0806.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.
*** OBJECT name got from file name.
*** OBJECT name got from file name.


Processing file: MAGIC_2014_Ic310.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.


Processing file: MAGIC_2014_PKS1424+240.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.
*** OBJECT name got from file name.
*** OBJECT name got from file name.


Processing file: MAGIC_2014_3C58.fits *** OBJECT name got from file name.


Processing file: MAGIC_2014_AEaqr.fits

Processing file: MAGIC_2014_3C279.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.


Processing file: MAGIC_2014_4C+5517.fits *** OBJECT name got from file name.


Processing file: MAGIC_2014_CrabPulsar.fits *** OBJECT name got from file name.
**

In [105]:
# File 'objs_posdate.csv' has been cleaned and missing values filled. Below is the content for future use.
# The name of the cleaned file is 'objs_posdate_clean.csv'.
posdate_file = 'objs_posdate_clean.csv.tmp'
fp = open(posdate_file,'w')
fp.write('''name_original,name_corrected,article,date-obs,ra,dec,is_upper_limit  
CrabNebula,Crab Nebula,http://dx.doi.org/10.1016/j.jheap.2015.01.002,2009-10,83.6332124,22.0144601,0  
1ES0806,1ES 0806+524,http://dx.doi.org/10.1093/mnras/stv895,2011-01,122.45494471,52.31618075,0  
Ic310,Ic310,http://dx.doi.org/10.1126/science.1256183,2012-11-13,49.1792708,41.3247667,0  
PKS1424+240,PKS1424+240,http://dx.doi.org/10.1051/0004-6361/201423364,2009-10,216.751632458,23.800010444,0  
PKS1424+240,PKS1424+240,http://dx.doi.org/10.1051/0004-6361/201423364,2010-08,216.751632458,23.800010444,0  
3C58,3C58,http://dx.doi.org/10.1051/0004-6361/201424261,2013-11,31.4043,64.8283,0  
3C279,3C279,http://dx.doi.org/10.1051/0004-6361/201323036,2011-02,194.046527375,-5.789312417,0  
3C279,3C279,http://dx.doi.org/10.1051/0004-6361/201323036,2011-06,194.046527375,-5.789312417,0  
4C+5517,4C+55.17,http://dx.doi.org/10.1093/mnras/stu227,2010-12,149.40909379,55.38270378,0  
CrabPulsar,Crab Pulsar,http://dx.doi.org/10.1051/0004-6361/201423664,2009-09,83.63307625,22.01449328,0  
NGC1275,NGC1275,http://dx.doi.org/10.1051/0004-6361/201322951,2009-10,49.950667083,41.511695306,0  
NGC1275,NGC1275,http://dx.doi.org/10.1051/0004-6361/201322951,2010-08,49.950667083,41.511695306,0  
IC310,IC310,http://dx.doi.org/10.1051/0004-6361/201321938,2009-12,49.1792708,41.3247667,0  
Segue1,Segue 1,http://dx.doi.org/10.1088/1475-7516/2014/02/008,2011-01,151.76333,16.07361,0  
GRB090102,GRB090102,http://dx.doi.org/10.1093/mnras/stt2041,2009-01-02,128.247917,33.107139,UL  
1ES2344+514,1ES2344+514,http://dx.doi.org/10.1051/0004-6361/201220714,2008-11,356.770153,51.704967,0  
J1644+57,Sw J1644+57,http://dx.doi.org/10.1051/0004-6361/201321197,2011-03-31,251.205375,57.58077778,UL  
4U 0142+61,4U 0142+61,http://dx.doi.org/10.1051/0004-6361/201220275,2008-08,26.59253,61.75106,0  
1E 2259+586,1E 2259+586,http://dx.doi.org/10.1051/0004-6361/201220275,2010-08,345.28391,58.87904,0  
1ES1215,1ES1215+303,http://dx.doi.org/10.1051/0004-6361/201219133,2011-02,184.46700833,30.11684333,0  
M87,M87,http://dx.doi.org/10.1051/0004-6361/201117827,2006-03,187.70593075,12.391123306,0  
Crab Nebula,Crab Nebula,http://dx.doi.org/10.1051/0004-6361/201218796,2007-10,83.633083,22.0145,0  
PKS 2155-304,PKS 2155-304,http://dx.doi.org/10.1051/0004-6361/201218796,2006-07-28,329.716937958,-30.225588389,0  
Perseus,Perseus,http://dx.doi.org/10.1051/0004-6361/201118502,2011-02,49.950667083,41.511695306,UL  
W51,W51,http://dx.doi.org/10.1051/0004-6361/201218846,2011-06,290.925,14.50917,0  
B32247,B3 2247+381,http://dx.doi.org/10.1051/0004-6361/201117967,2010-10,347.343870254,2.144386368,0  
ScorpiusX1,Sco X-1,http://dx.doi.org/10.1088/2041-8205/735/1/L5,2010-05,244.9794458,-15.6402833,UL  
PKS1222,PKS 1222+21,http://dx.doi.org/10.1088/2041-8205/730/1/L8,2010-06-17,186.22692,21.3795625,0  
IC310,IC310,http://dx.doi.org/10.1088/2041-8205/723/2/L207,2009-10,49.1792708,41.3247667,UL  
1FGL J1954.3+2836,1FGL J1954.3+2836,http://dx.doi.org/10.1088/0004-637X/725/2/1629,2009-08,298.579792,28.60167,0  
1FGL J1958.6+2845,1FGL J1958.6+2845,http://dx.doi.org/10.1088/0004-637X/725/2/1629,2009-08,299.6667,28.765,UL  
1FGL J1958.63+2845,1FGL J1958.6+2845,http://dx.doi.org/10.1088/0004-637X/725/2/1629,2009-08,299.6667,28.765,UL  
CygnusX3,Cygnus X-3,http://dx.doi.org/10.1088/0004-637X/721/1/843,2009-08,308.107417,40.95775,0  
Blazars,(1ES 1011+496),http://dx.doi.org/10.1088/0004-637X/729/2/115,None,None,None,UL  
1ES 0033+595,1ES 0033+595,http://dx.doi.org/10.1088/0004-637X/729/2/115,2006-08,8.968975,59.8344972,UL  
1ES 0120+340,1ES 0120+340,http://dx.doi.org/10.1088/0004-637X/729/2/115,2005-08,20.7860333,34.3468222,UL  
1ES 0229+200,1ES 0229+200,http://dx.doi.org/10.1088/0004-637X/729/2/115,2006-08,38.2025639917,20.2881898889,UL  
RX J0319.8+1845,RX J0319.8+1845,http://dx.doi.org/10.1088/0004-637X/729/2/115,2005-09,49.96585,18.7595556,UL  
1ES 0323+022,1ES 0323+022,http://dx.doi.org/10.1088/0004-637X/729/2/115,2005-09,51.5581177833,2.420743025,UL  
1ES 0414+009,1ES 0414+009,http://dx.doi.org/10.1088/0004-637X/729/2/115,2005-12,64.2187375,1.0899944,UL  
1RXS J044127.8+150455,1RXS J044127.8+150455,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-10,70.3643917,15.0820944,UL  
1ES 0647+250,1ES 0647+250,http://dx.doi.org/10.1088/0004-637X/729/2/115,2008-02,102.693709246,25.0498953861,UL  
1ES 0806+524,1ES 0806+524,http://dx.doi.org/10.1088/0004-637X/729/2/115,2005-10,122.454944708,52.31618075,UL  
1ES 0927+500,1ES 0927+500,http://dx.doi.org/10.1088/0004-637X/729/2/115,2005-12,142.6566542,49.8404528,UL  
1ES 1028+511,1ES 1028+511,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-03,157.827188208,50.8932831306,UL  
RGB J1117+202,RGB J1117+202,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-01,169.276046717,20.2353848833,UL  
RX J1136.5+6737,RX J1136.5+6737,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-02,174.125362892,67.6178736833,UL  
B2 1215+30,B2 1215+30,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-03,184.46700833,30.11684333,UL  
2E 1415.6+2557,2E 1415.6+2557,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-02,214.4861,25.7240111,UL  
PKS 1424+240,PKS 1424+240,http://dx.doi.org/10.1088/0004-637X/729/2/115,2006-05,216.751632458,23.800010444,UL  
RX J1725.0+1152,RX J1725.0+1152,http://dx.doi.org/10.1088/0004-637X/729/2/115,2007-03,261.268086958,11.870963556,UL  
1ES 1727+502,1ES 1727+502,http://dx.doi.org/10.1088/0004-637X/729/2/115,2006-05,262.07759958,50.219575,UL  
1ES 1741+196,1ES 1741+196,http://dx.doi.org/10.1088/0004-637X/729/2/115,2006-07,265.99096917,19.58583806,UL  
B3 2247+381,B3 2247+381,http://dx.doi.org/10.1088/0004-637X/729/2/115,2006-08,342.52395075,38.410332111,UL  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-22,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-24,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-25,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-26,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-27,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-28,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-29,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-04-30,166.113808083,38.208833083,0  
Mrk421,Mrk421,http://dx.doi.org/10.1051/0004-6361/200913945,2006-06-14,166.113808083,38.208833083,0  
PG1553,PG 1553+113,http://dx.doi.org/10.1051/0004-6361/200913607,2008-04,238.92935,11.19010167,0  
Mrk501,Mrk501,http://dx.doi.org/10.1088/0004-637X/705/2/1624,2006-07,253.4675695,39.760169139,0  
Perseus,Perseus,http://dx.doi.org/10.1088/0004-637X/710/1/634,2008-12,49.950667083,41.511695306,UL  
S50716,S5 0716+714,http://dx.doi.org/10.1088/0004-637X/704/2/L129,2008-04-22,110.47270192,71.34343428,0  
OJ287,OJ287,http://dx.doi.org/10.1093/pasj/61.5.1011,2007-11-09,133.703645542,20.108511389,0  
M13,M13,http://dx.doi.org/10.1088/0004-637X/702/1/266,2007-07,250.423475,36.4613194,UL  
Mrk421,Mrk421,http://dx.doi.org/10.1088/0004-637X/703/1/169,2006-04-30,166.113808083,38.208833083,0  
3C454,3C454,http://dx.doi.org/10.1051/0004-6361/200811326,2007-12,342.89473458,18.81112944,UL  
3C66A,3C66A,http://dx.doi.org/10.1088/0004-637X/692/1/L29,2007-12,35.66505,43.0355222,0  
WR 147,WR 147,http://dx.doi.org/10.1086/592433,2007-08-11,309.18156,40.35223,0  
WR 146,WR 146,http://dx.doi.org/10.1086/592433,2005-01-01,308.946167,41.379056,0  
3C279,3C279,http://www.sciencemag.org/content/320/5884/1752.full,2006-02-23,194.046527375,-5.789312417,0  
LSI61303,LS I +61 303,http://dx.doi.org/10.1088/0004-637X/693/1/303,2006-12,40.13193816,61.22933651,0  
M87,M87,http://dx.doi.org/10.1086/592348,2008-02-11,187.70593075,12.391123306,0  
LSI61303,LS I +61 303,http://dx.doi.org/10.1086/590332,2006-11-19,40.13193816,61.22933651,0  
1ES1959,1ES 1959+650,http://dx.doi.org/10.1086/586731,2006-05-27,299.99938375,65.14851472,0  
1ES1011,1ES 1011+496,http://dx.doi.org/10.1086/521982,2007-05,153.76724917,49.43352908,0  
CassiopeiaA,CassiopeiaA,http://dx.doi.org/10.1051/0004-6361:20078168,2007-01,350.8664167,58.8117778,0  
IC443,IC443,http://dx.doi.org/10.1086/520957,2007-01,94.51125,22.66,0  
CygnusX1,Cygnus X-1,http://dx.doi.org/10.1086/521145,2006-09-24,299.59031645,35.20160509,0  
CygnusX1,Cygnus X-1,http://dx.doi.org/10.1086/521145,2006-07-26,299.59031645,35.20160509,0  
Mrk421,Mrk421,http://dx.doi.org/10.1086/518221,2005-04,166.113808083,38.208833083,0  
Mrk180,Mrk180,http://dx.doi.org/10.1086/508020,2006-03-30,174.110035042,70.157585278,0  
LSI61303,LS I +61 303,http://dx.doi.org/10.1126/science.1128177,2006-03,40.13193816,61.22933651,0  
HESS1813,HESS J1813-178,http://dx.doi.org/10.1086/500364,2005-07,273.36292,-17.84889,0  
GC,Galactic Center,http://dx.doi.org/10.1086/501164,2005-06,266.416833,-29.007806,0  
''')
fp.close()

In [107]:
# And now we redo the processing fixing the date-obs and position values
from astropy.table import Table
suptab = Table.read(posdate_file,format='ascii.csv')

def write_ipac(tabled,tableu,tablev,outdir):
    from astropy.table import Table
    from astropy.io.ascii import masked
    def fix_meta_for_ipac(tv):
        tvn = OrderedDict()
        for k,v in tv.items():
            tvn.update({k:{'value':v}})
        return tvn
    metav = fix_meta_for_ipac(tablev)
    t = Table()
    t.meta.update({'keywords':metav})
    leng = 0
    fill = []
    for k in tabled.keys():
        d = tabled[k]
        u = tableu[k+'_unit']
        if d is not None:
            leng = len(d)
            col = t.Column(d,name=k,unit=u)
        else:
            col = t.Column(name=k,unit=u,length=leng)
        t.add_column(col)
        fill.append( (masked,0.0,k) )
    fname = tablev['filename'][:-5]+'.'+tablev['EXTNAME']+'.dat'
    fname = os.path.join(outdir,fname)
    t.write(fname,format='ascii.ipac',fill_values=fill)
    return fname

def fix_posdate_values(table,table_support):
    ts = table_support
    obj_line_name = table_support['name_original']==table['OBJECT']
    obj_line_ref = table_support['article']==table['article']
    obj_line = obj_line_name * obj_line_ref
    objtab = table_support[obj_line]
    if table['DATE_OBS'] == None:
        assert len(objtab)==1, "Object %s, has more than one or none entries in support table:\n%s" % (table['OBJECT'],objtab)
        table['DATE_OBS'] = objtab['date-obs'][0]
    try:
        table['upper_limit'] = 0 if objtab['is_upper_limit'][0]=='0' else 1
    except:
        print objtab
    table['ra'] = objtab['ra'][0]
    table['dec'] = objtab['dec'][0]
    table['OBJECT'] = objtab['name_corrected'][0]
    return

indir = 'pic_parsing_files'
outdir = 'magic_spectrum_files'
if os.path.exists(outdir) and os.path.isdir(outdir):
    for f in glob.glob(os.path.join(outdir,'*')):
        os.remove(f)
    os.rmdir(outdir)
os.mkdir(outdir)
outdir = os.path.join(os.getcwd(),outdir)

cnt = -1
for fn in table_valid['fitsfile']:
    cnt+=1
    print
    print "Processing file:",fn,
    out = read_fits(os.path.join(indir,fn))
    if out is None:
        print " ..failed: to open." 
        continue
    print
    for tabs in out:
        td = tabs['data']
        tu = tabs['units']
        tv = tabs['variables']
        transf_data(td,tu)
        reference = table_valid['article'][cnt]
        tv['article'] = str(reference)
        print "Object:",tv['OBJECT']
        fix_posdate_values(tv,suptab)
        filename = write_ipac(td,tu,tv,outdir=outdir)
        print "-> output file",filename,"created." 
print


Processing file: MAGIC_2015_CrabNebula.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.

Object: CrabNebula
-> output file /home/chbrandt/fido/booq/docs/notebooks/magic_spectrum_files/MAGIC_2015_CrabNebula.SPECTRUM.dat created.
Object: CrabNebula
-> output file /home/chbrandt/fido/booq/docs/notebooks/magic_spectrum_files/MAGIC_2015_CrabNebula.SPECTRUM_1.dat created.

Processing file: MAGIC_2015_1ES0806.fits *** OBJECT name got from file name.
*** OBJECT name got from file name.
*** OBJECT name got from file name.
*** OBJECT name got from file name.

Object: 1ES0806
-> output file /home/chbrandt/fido/booq/docs/notebooks/magic_spectrum_files/MAGIC_2015_1ES0806.SPECTRUM.dat created.
Object: 1ES0806
-> output file /home/chbrandt/fido/booq/docs/notebooks/magic_spectrum_files/MAGIC_2015_1ES0806.SPECTRUM_1.dat created.
Object: 1ES0806
-> output file /home/chbrandt/fido/booq/docs/notebooks/magic_spectrum_files/MAGIC_2015_1ES0806.SPECTRUM_2.dat created.
Object: 1ES0

In [193]:
# Now let's reprocess each one of these files with the goal of create a plain/unique table version
def planify_table(table):
    """
    """
    from astropy.table import Table
    leng = len(table)
    for k,v in table.meta['keywords'].items():
        arr = [ v['value'] for i in range(leng) ]
        table[k] = arr
    return table

def append_table(master_table,current_table):
    """
    """
    leng = len(current_table)
    mt = master_table.copy()
    for i in range(leng):
        row_vals = []
        for cname in mt.colnames:
            col = current_table[cname]
            u = col.unit
            mu = mt[cname].unit
            du = Unit('')
            if not u or u==du or u.is_equivalent(mu):
                row_vals.append(col[i])
            else:
                return master_table
        try:
            mt.add_row(row_vals)
        except:
            return master_table
    return mt

indir = 'magic_spectrum_files'
datfiles = glob.glob(os.path.join(indir,'*.dat'))
master = Table(names=['OBJECT',
                        'ra',
                        'dec',
                        'energy',
                        'Denergy',
                        'flux',
                        'Dflux',
                        'upper_limit',
                        'DATE_OBS',
                        'article'],
                dtype=('object','f8','f8','f8','f8','f8','f8','bool','object','object'))
master['ra'].unit = str(Unit('deg'))
master['dec'].unit = str(Unit('deg'))
master['energy'].unit = str(Unit('Hz'))
master['Denergy'].unit = str(Unit('Hz'))
master['flux'].unit = str(Unit('erg/(s cm2)'))
master['Dflux'].unit = str(Unit('erg/(s cm2)'))

cnt = 0
for f in datfiles:
    t = Table.read(f,format='ascii.ipac')
    pt = planify_table(t)
    master = append_table(master,pt)


In [246]:
datafile = 'table_all.dat'
master.write(datafile,format='ascii.ipac')

In [219]:
# Now let me try to generate an xml document with the metadata available at this (master) table


In [272]:
import xml.etree.ElementTree as ET

def define_resource(name):
    """
    """
    metadata = {'title' : None,
                'shortName' : None,
                'subject' : None,
                'source' : None,
                'referenceURL' : None,
                'type' : None,
                'creationDate' : None,
                'creator.name' : None,
                'description' : None,
                'facility' : None,
                'instrument' : None,
                'coverage.waveband' : None,
                'copyright' : None,
                'service' : None}
    
    root = ET.Element('resource')
    schema = ET.SubElement(root,'schema')
    schema.text = name
    for k,v in metadata.items():
        metaitem = ET.SubElement(root,'meta',name=k)
        metaitem.text = str(v)
    return root

def define_table(table,parent=None):
    pars = {'id':'main','onDisk':'True','mixin':"//scs#q3cindex"}
    if parent is not None:
        xtable = ET.SubElement(parent,'table',pars)
    else:
        xtable = ET.Element('table',pars)
    xind = ET.SubElement(xtable,'index',{'columns':''})
    for cn in table.colnames:
        col = table[cn]
        u = str(col.unit) if col.unit else ''
        xcol = ET.SubElement(xtable, 'column', {'name':col.name,
                                               'unit':u})
    return xtable

def define_data(datafile,format='ascii.ipac',**kwargs):
    """
    """
    def define_grammar(datafile,**kwargs):
        """
        """
        def define_columnGrammar(columnDefs,**kwargs):
            tag = 'columnGrammar'
            properties = ['topIgnoredLines']
            grammar_element = ET.Element(tag)
            for k,v in kwargs.items():
                if not k in properties:
                    continue
                grammar_element.set(k,v)
            coldef_element = ET.SubElement(grammar_element,'colDefs')
            colDefsText = str('\n')
            for c,d in columnDefs.items():
                # it is assumed the items to have the structure key:'key' value:'(first-column-number,last-column-number)'
                colDefsText += '\t{0}:{1}-{2}\n'.format(c,d[0],d[1])
            coldef_element.text = colDefsText
            return grammar_element

        def define_csvGrammar(table):
            pass

        def define_fitsTableGrammar(table):
            pass

        grammars = {'ascii.ipac':define_columnGrammar,
                    'ascii.csv' :define_csvGrammar,
                    'fits.table':define_fitsTableGrammar}

        def get_columnDefs(datafile):
            """
            """
            from collections import OrderedDict
            outDefs = OrderedDict()
            fp = open(datafile,'r')
            sep = "|"
            for line in fp.readlines():
                if line[0]!=sep:
                    continue
                sep_position = [ pos for pos,char in enumerate(line) if char == sep]
                colnames = [ cn.strip() for cn in line.split(sep) if cn.strip()!="" ]
                break
            assert len(colnames)+1 == len(sep_position)
            for i in range(len(sep_position)):
                if i==len(colnames):
                    break
                colname = colnames[i]
                inipos = sep_position[i]+1
                finpos = sep_position[i+1]-1
                outDefs[colname] = (inipos,finpos)
            return outDefs
        
        colDefs = get_columnDefs(datafile)
        ge = grammars[format](colDefs)
        source = ET.SubElement(ge,'source')
        source.text = datafile
        return ge

    def define_make():
        """
        """
        pars = {'table':'main'}
        make_element = ET.Element('make',pars)
        row_element = ET.SubElement(make_element,'rowmaker',{'idmaps':'*'})
        return make_element
    
    xdata = ET.Element('data',{'id':'import'})
    eg = define_grammar(datafile)
    em = define_make()
    xdata.extend([eg,em])
    return xdata
    
def define_service(resource_element):
    """
    """
    def define_core(pars={'queriedTable':'main'}):
        """
        """
        core_elem = ET.Element('dbCore',pars)
        feed_elem = ET.SubElement(core_elem,'FEED',{'source':"//scs#coreDescs"})
        return core_elem
        
    def define_publish(renderers=['scs.xml','form']):
        """
        """
        rout = []
        for ri in renderers:
            xre = ET.Element('publish',{'render':ri,'sets':'local'})
            rout.append(xre)
        return rout
    
    pars = {'id':'cone','allowed':'scs.xml,form,static'}
    service_element = ET.SubElement(resource_element,'service',pars)
    ce = define_core()
    pe = define_publish()
    les = [ce]
    les.extend(pe)
    service_element.extend(les)
    return service_element

def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    from xml.dom import minidom
    rough_string = ET.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

xroot = define_resource('teste')
xtable = define_table(master)
xdata = define_data(datafile)
xroot.extend([xtable,xdata])

xserv = define_service(xroot)

print prettify(xroot)

#tree = open('table_meta.xml','w')
#tree.write(prettify(xroot))
#tree.close()


<?xml version="1.0" ?>
<resource>
  <schema>teste</schema>
  <meta name="description">None</meta>
  <meta name="facility">None</meta>
  <meta name="shortName">None</meta>
  <meta name="creationDate">None</meta>
  <meta name="subject">None</meta>
  <meta name="service">None</meta>
  <meta name="copyright">None</meta>
  <meta name="title">None</meta>
  <meta name="referenceURL">None</meta>
  <meta name="source">None</meta>
  <meta name="coverage.waveband">None</meta>
  <meta name="instrument">None</meta>
  <meta name="creator.name">None</meta>
  <meta name="type">None</meta>
  <table id="main" mixin="//scs#q3cindex" onDisk="True">
    <index columns=""/>
    <column name="OBJECT" unit=""/>
    <column name="ra" unit="deg"/>
    <column name="dec" unit="deg"/>
    <column name="energy" unit="Hz"/>
    <column name="Denergy" unit="Hz"/>
    <column name="flux" unit="erg / (cm2 s)"/>
    <column name="Dflux" unit="erg / (cm2 s)"/>
    <column name="upper_limit" unit=""/>
    <column name="D