# Compare atomic files

This notebook shows how to compare the attributes `levels_prepared` and `lines_prepared` generated by Carsus and stored in HDF5 format.

In [1]:
import pandas as pd
from carsus.util import parse_selected_species

  return f(*args, **kwds)
  from tqdm.autonotebook import tqdm


## Set paths

In [2]:
path_a = 'kurucz_H-Zn_chianti_H-He_sql.h5'
path_b = 'kurucz_H-Zn_chianti_H-He_pandas.h5'

## Define functions

In [3]:
def highlight_values(val):
    if val == True:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'
    
def highlight_diff(val):
    if val == 0:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'

In [4]:
def compare_levels_lines(path_a, path_b, ions='H-Zn'):
    
    # Read data
    levels_a = pd.read_hdf(path_a, key='levels')
    levels_b = pd.read_hdf(path_b, key='levels')
    lines_a = pd.read_hdf(path_a, key='lines')
    lines_b = pd.read_hdf(path_b, key='lines')
    
    # Get ions list
    ions = parse_selected_species(ions)
    
    lvl_eq = []
    lns_eq = []
    for ion in ions:
        
        # How many levels per ion in A
        try:
            num_lvl_a = len(levels_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_a = 0
            
        # How many levels per ion in B
        try:
            num_lvl_b = len(levels_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_b = 0

        # If level number is the same in A and B (and not zero) 
        # then compare cell against cell. `True` means all cells 
        # are equal in both dataframes.
        if num_lvl_a == num_lvl_b:
            val_lvl = True
            
            if num_lvl_a != 0:
                try:
                    k = levels_a.loc[ion].eq(levels_b.loc[ion]).sum().sum()
                    if num_lvl_a*3 != k:  # x3 because this df has three columns!
                        val_lvl = False
                        
                except (KeyError, TypeError, ValueError):
                    pass

        else:
            val_lvl = False
            
        # Append the results
        lvl_eq.append((ion, num_lvl_a, num_lvl_b, val_lvl))
        
        
        # Same for lines
        try:
            num_lns_a = len(lines_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_a = 0
            
        try:
            num_lns_b = len(lines_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_b = 0

        if num_lns_a == num_lns_b:
            val_lns = True
            
            if num_lns_a != 0:
                try:
                    k = lines_a.loc[ion].eq(lines_b.loc[ion]).sum().sum()
                    if num_lns_a*8 != k:
                        val_lvl = False
            
                except (KeyError, TypeError, ValueError):
                    pass
            
        else:
            val_lns = False
        
        lns_eq.append((ion, num_lns_a, num_lns_b, val_lns))
            
    df_lvl = pd.DataFrame(lvl_eq, columns=['ion', 'num_lvl_a', 'num_lvl_b', 'val_lvl'])
    df_lns = pd.DataFrame(lns_eq, columns=['ion', 'num_lns_a', 'num_lns_b', 'val_lns'])
    df = pd.merge(df_lvl, df_lns).set_index('ion')
    
    df['diff_lvl'] = abs(df['num_lvl_b'] - df['num_lvl_a'])
    df['diff_lns'] = abs(df['num_lns_b'] - df['num_lns_a'])
    df = df[['num_lvl_a', 'num_lvl_b', 'diff_lvl', 'val_lvl', 
          'num_lns_a', 'num_lns_b', 'diff_lns', 'val_lns']]

    return df

In [5]:
def lvl_diff(path_a, path_b, ion):
    
    # Read data
    levels_a = pd.read_hdf(path_a, key='levels').loc[ion]
    levels_b = pd.read_hdf(path_b, key='levels').loc[ion]
    
    df = levels_a.join(levels_b, how='outer', lsuffix='_a', rsuffix='_b')
    df['energy_diff'] = abs(df['energy_b'] - df['energy_a'])
    df.loc[df['g_a'] == df['g_b'], 'g_values'] = True
    df.loc[df['g_a'] != df['g_b'], 'g_values'] = False
    df.loc[df['metastable_a'] == df['metastable_b'], 'metastable_values'] = True
    df.loc[df['metastable_a'] != df['metastable_b'], 'metastable_values'] = False
    df = df[['energy_a', 'energy_b', 'energy_diff', 'g_a', 'g_b', 'g_values',
             'metastable_a', 'metastable_b', 'metastable_values']]

    return df

## Summary and test table

In [6]:
tt = compare_levels_lines(path_a, path_b)
summary = pd.DataFrame(tt.sum().astype(int), columns=['Total'])



In [7]:
summary

Unnamed: 0,Total
num_lvl_a,24537
num_lvl_b,24538
diff_lvl,1
val_lvl,464
num_lns_a,271771
num_lns_b,271771
diff_lns,0
val_lns,465


- **num_???:** _(int)_ total number of levels/lines.
- **diff_???:** _(int)_ difference in total number of levels/lines between A and B.
- **val_???:** _(bool)_ means every single cell in dataframe A is equal to the one on B.

In [8]:
tt.style.applymap(highlight_values, subset=['val_lvl', 'val_lns']).applymap(
    highlight_diff, subset=['diff_lvl', 'diff_lns'])

Unnamed: 0_level_0,num_lvl_a,num_lvl_b,diff_lvl,val_lvl,num_lns_a,num_lns_b,diff_lns,val_lns
ion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(1, 0)",25,25,0,True,74,74,0,True
"(2, 0)",49,49,0,True,174,174,0,True
"(2, 1)",25,25,0,True,67,67,0,True
"(3, 0)",67,67,0,True,403,403,0,True
"(3, 1)",55,55,0,True,135,135,0,True
"(3, 2)",1,1,0,True,0,0,0,True
"(4, 0)",28,28,0,True,39,39,0,True
"(4, 1)",39,39,0,True,185,185,0,True
"(4, 2)",17,17,0,True,27,27,0,True
"(4, 3)",1,1,0,True,0,0,0,True


## Check specific ion levels

This function is specially useful when you have same number of levels for a specific ion and `val_lvl` is `False`. Otherwise the color scheme could be confusing, so focus on energy values.

In [9]:
ion = (18,1)

In [10]:
lvl_diff(path_a, path_b, ion).style.applymap(
    highlight_diff, subset=['energy_diff']).applymap(
        highlight_values, subset=['g_values', 'metastable_values'])

Unnamed: 0_level_0,energy_a,energy_b,energy_diff,g_a,g_b,g_values,metastable_a,metastable_b,metastable_values
level_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,0.0,0.0,0.0,2.0,2,True,True,True,True
1,0.177493,0.0,0.177493,4.0,4,True,True,True,True
2,13.4798,0.177493,13.3023,2.0,4,False,False,False,True
3,16.4065,13.4798,2.92675,8.0,2,False,True,True,True
4,16.4256,16.4065,0.0190743,6.0,8,False,True,True,True
5,16.4441,16.4256,0.0185382,4.0,6,False,True,True,True
6,16.4574,16.4441,0.0132633,2.0,4,False,True,True,True
7,16.6439,16.4574,0.186477,6.0,2,False,False,False,True
8,16.7485,16.6439,0.104674,4.0,6,False,False,False,True
9,16.8125,16.7485,0.0639432,2.0,4,False,False,False,True
