# Compare atomic files

In [1]:
import pandas as pd
from carsus.util import parse_selected_species

In [2]:
path_a = '/home/epassaro/Downloads/tardis-data/kurucz_cd23_latest.h5'
path_b = '/home/epassaro/Downloads/tardis-data/kurucz_cd23_latest_chianti_Si_2.h5'

## Global comparison

In [3]:
pd.read_hdf(path_a, key='atom_data').equals(pd.read_hdf(path_b, key='atom_data'))

True

In [4]:
pd.read_hdf(path_a, key='ionization_data').equals(pd.read_hdf(path_b, key='ionization_data'))

True

In [5]:
pd.read_hdf(path_a, key='levels').equals(pd.read_hdf(path_b, key='levels'))

False

In [6]:
pd.read_hdf(path_a, key='lines').equals(pd.read_hdf(path_b, key='lines'))

False

In [7]:
pd.read_hdf(path_a, key='zeta_data').equals(pd.read_hdf(path_b, key='zeta_data'))

True

In [8]:
pd.read_hdf(path_a, key='macro_atom_data').equals(pd.read_hdf(path_b, key='macro_atom_data'))

False

In [9]:
pd.read_hdf(path_a, key='macro_atom_references').equals(pd.read_hdf(path_b, key='macro_atom_references'))

False

## Detailed levels and lines comparison

In [10]:
def highlight_values(val):
    if val == True:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'

def highlight_diff(val):
    if val == 0:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'

In [11]:
def compare_levels_lines(path_a, path_b, ions='H-Zn'):

    # Read data
    levels_a = pd.read_hdf(path_a, key='levels')
    levels_b = pd.read_hdf(path_b, key='levels')
    lines_a = pd.read_hdf(path_a, key='lines')
    lines_b = pd.read_hdf(path_b, key='lines')

    # Get ions list
    ions = parse_selected_species(ions)

    lvl_eq = []
    lns_eq = []
    for ion in ions:

        # How many levels per ion in A
        try:
            num_lvl_a = len(levels_a.loc[ion])

        except (KeyError, TypeError, ValueError):
            num_lvl_a = 0

        # How many levels per ion in B
        try:
            num_lvl_b = len(levels_b.loc[ion])

        except (KeyError, TypeError, ValueError):
            num_lvl_b = 0

        # If level number is the same in A and B (and not zero)
        # then compare cell against cell. `True` means all cells
        # are equal in both dataframes.
        if num_lvl_a == num_lvl_b:
            eq_lvl = True

            if num_lvl_a != 0:
                try:
                    k = levels_a.loc[ion].eq(levels_b.loc[ion]).sum().sum()
                    if num_lvl_a*3 != k:  # x3 because this df has three columns!
                        eq_lvl = False

                except (KeyError, TypeError, ValueError):
                    pass

        else:
            eq_lvl = False

        # Append the results
        lvl_eq.append((ion, num_lvl_a, num_lvl_b, eq_lvl))


        # Same for lines
        try:
            num_lns_a = len(lines_a.loc[ion])

        except (KeyError, TypeError, ValueError):
            num_lns_a = 0

        try:
            num_lns_b = len(lines_a.loc[ion])

        except (KeyError, TypeError, ValueError):
            num_lns_b = 0

        if num_lns_a == num_lns_b:
            eq_lns = True

            if num_lns_a != 0:
                try:
                    k = lines_a.loc[ion].eq(lines_b.loc[ion]).sum().sum()
                    if num_lns_a*8 != k:
                        eq_lvl = False

                except (KeyError, TypeError, ValueError):
                    pass

        else:
            eq_lns = False

        lns_eq.append((ion, num_lns_a, num_lns_b, eq_lns))

    df_lvl = pd.DataFrame(lvl_eq, columns=['ion', 'num_lvl_a',
                                           'num_lvl_b', 'eq_lvl'])
    df_lns = pd.DataFrame(lns_eq, columns=['ion', 'num_lns_a',
                                           'num_lns_b', 'eq_lns'])
    df = pd.merge(df_lvl, df_lns).set_index('ion')

    df['lvl_diff'] = abs(df['num_lvl_b'] - df['num_lvl_a'])
    df['diff_lns'] = abs(df['num_lns_b'] - df['num_lns_a'])
    df = df[['num_lvl_a', 'num_lvl_b', 'lvl_diff', 'eq_lvl',
          'num_lns_a', 'num_lns_b', 'diff_lns', 'eq_lns']]

    return df

In [12]:
tt = compare_levels_lines(path_a, path_b)
summary = pd.DataFrame(tt.sum().astype(int), columns=['Total'])
summary.loc['ions'] = len(tt)
summary = summary.loc[['ions', 'eq_lvl', 'num_lvl_a', 'num_lvl_b', 'lvl_diff',
                       'eq_lns', 'num_lns_a', 'num_lns_b', 'diff_lns']]



In [13]:
summary

Unnamed: 0,Total
ions,465
eq_lvl,464
num_lvl_a,25193
num_lvl_b,25050
lvl_diff,143
eq_lns,465
num_lns_a,272068
num_lns_b,272068
diff_lns,0


In [14]:
tt.style.applymap(highlight_values, subset=['eq_lvl', 'eq_lns']).applymap(
    highlight_diff, subset=['lvl_diff', 'diff_lns'])

Unnamed: 0_level_0,num_lvl_a,num_lvl_b,lvl_diff,eq_lvl,num_lns_a,num_lns_b,diff_lns,eq_lns
ion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(1, 0)",1,1,0,True,0,0,0,True
"(2, 0)",744,744,0,True,605,605,0,True
"(2, 1)",9,9,0,True,7,7,0,True
"(3, 0)",67,67,0,True,403,403,0,True
"(3, 1)",55,55,0,True,135,135,0,True
"(3, 2)",1,1,0,True,0,0,0,True
"(4, 0)",28,28,0,True,39,39,0,True
"(4, 1)",39,39,0,True,185,185,0,True
"(4, 2)",17,17,0,True,27,27,0,True
"(4, 3)",1,1,0,True,0,0,0,True
