In [1]:
import math
import numpy as np
from astropy.io.votable import parse_single_table
import astropy.units as u
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import glob
from scipy import stats

import seaborn as sns
import pandas as pd
sns.set()

## Verify a new version of a catalogue

Check the contents of a new catalgoue against an old one and report on any changes

In [118]:
def get_changes(orig, new):
    new_cols = np.array(sorted(new))
    orig_cols = np.array(sorted(orig))
    added = ~np.in1d(new_cols, orig_cols)
    removed = ~np.in1d(orig_cols, new_cols)
    return new_cols[added], orig_cols[removed], new_cols[~added]

    
def compare_tables(orig_cat, new_cat):
    added, removed, same = get_changes(orig_cat.colnames, new_cat.colnames)

    print ("Comparing table structure...")
    print(" Added column(s)", added)
    print(" Removed column(s)", removed)
    print(" Matching column(s)", len(same))
    row_diff = len(orig_cat) - len(new_cat)
    if row_diff > 0:
        print (" Added {} rows".format(row_diff))
    elif row_diff < 0:
        print (" Removed {} rows".format(-row_diff))
    else:
        print (" Same number of rows ({})".format(len(orig_cat)))

    print ('')

    print ("Comparing column definitions...")
    for colname in same:
        orig_col = orig_cat[colname]
        new_col = new_cat[colname]
        
        #if orig_col.ucd != new_col.ucd:
        #    print ("UCD for {} changed from {} to {}".format(colname, orig_col.ucd, new_col.ucd))
        if orig_col.unit != new_col.unit:
            print (" Unit for {} changed from {} to {}".format(colname, orig_col.unit, new_col.unit))
        if orig_col.meta != new_col.meta:
            print (" Meta for {} changed from {} to {}".format(colname, orig_col.meta, new_col.meta))

    print ('')
        
def get_ucd_col(table):
    id_col = None
    for col in table.itercols():
        if 'ucd' in col.meta and 'meta.id' in col.meta['ucd']:
            if 'meta.main' in col.meta['ucd'] or id_col is None:
                id_col = col
    return id_col
        
    
def compare_values(orig_cat, new_cat, max_diff=20):

    print ("Comparing table contents..")
    _, _, same_cols = get_changes(orig_cat.colnames, new_cat.colnames)


    orig_id_col = get_ucd_col(orig_cat)
    new_id_col = get_ucd_col(orig_cat)

    #print ('None' if orig_id_col is None else orig_id_col.name)

    added, removed, same = get_changes(orig_id_col.value.data, new_id_col.value.data)

    print(" Added id(s)", added)
    print(" Removed id(s)", removed)
    print(" Same id(s)", len(same))
    
    num_diff = 0
    for idx, row_id in enumerate(same):
        orig_row = orig_cat[orig_id_col.value.data == row_id]
        new_row = new_cat[new_id_col.value.data == row_id]

        if idx < 0:
            print (orig_row)
            print (new_row)

        for colname in same_cols:
            orig_val = orig_row[colname].value[0]
            new_val = new_row[colname].value[0]

            if orig_val != new_val:
                num_diff += 1
                if num_diff <= max_diff:
                    print (" In record {} {} changed from {} to {}".format(row_id, colname, orig_val, new_val))

    if num_diff > max_diff:
        print (' ...')
    print (" Found {} differences in {} rows.".format(num_diff, len(same)))



def compare_catalogs(orig_cat_name, new_cat_name):
    print ("Comparing {} with {}".format(orig_cat_name, new_cat_name))

    orig_spectra_table = parse_single_table(orig_cat_name).to_table()
    new_spectra_table = parse_single_table(new_cat_name).to_table()
    compare_tables(orig_spectra_table, new_spectra_table)
    compare_values(orig_spectra_table, new_spectra_table)
    print ('')


    

In [119]:

compare_catalogs('sb10944/v3a/askap_spectra.vot', 'sb10944/v3b/askap_spectra.vot')
compare_catalogs('sb10944/v3a/askap_absorption.vot', 'sb10944/v3b/askap_absorption.vot')





Comparing sb10944/v3a/askap_spectra.vot with sb10944/v3b/askap_spectra.vot
Comparing table structure...
 Added column(s) ['flux_int' 'glat' 'glon']
 Removed column(s) []
 Matching column(s) 23
 Same number of rows (373)

Comparing column definitions...
 Unit for flux_peak changed from mJy/beam to mJy / beam
 Unit for mean_cont changed from mJy/beam to mJy / beam

Comparing table contents..
 Added id(s) []
 Removed id(s) []
 Same id(s) 373
 Found 0 differences in 373 rows.

Comparing sb10944/v3a/askap_absorption.vot with sb10944/v3b/askap_absorption.vot
Comparing table structure...
 Added column(s) ['e_ew' 'ew']
 Removed column(s) []
 Matching column(s) 24
 Same number of rows (134)

Comparing column definitions...
 Unit for flux_peak changed from mJy/beam to mJy / beam
 Unit for length changed from chan to chan
 Unit for mean_cont changed from mJy/beam to mJy / beam

Comparing table contents..
 Added id(s) []
 Removed id(s) []
 Same id(s) 134
 Found 0 differences in 134 rows.

