# Compare Atomic Files

This notebook shows how to compare the `levels_prepared` and `lines_prepared` DataFrames of the atomic files generated by Carsus.


Let's create two different TARDIS atomic files to use as examples.

In [1]:
from carsus.io.nist import NISTWeightsComp, NISTIonizationEnergies
from carsus.io.kurucz import GFALLReader
from carsus.io.zeta import KnoxLongZeta
from carsus.io.chianti_ import ChiantiReader
from carsus.io.output import TARDISAtomData

 ChiantiPy version 0.8.4 
 found PyQt5 widgets
 using PyQt5 widgets


For the first atomic file we grab species `H-C` from GFALL and `H-He` from Chianti.

In [2]:
atomic_weights = NISTWeightsComp()
ionization_energies = NISTIonizationEnergies('H-C')
gfall_reader = GFALLReader(ions='H-C')
chianti_reader = ChiantiReader(ions='H-He', collisions=True, priority=20)
zeta_data = KnoxLongZeta()

[[1m carsus.io.nist.weightscomp[0m][[1;37mINFO[0m] - Downloading data from the NIST Atomic Weights and Isotopic Compositions Database. ([1mweightscomp.py[0m:49)


[[1m  carsus.io.nist.ionization[0m][[1;37mINFO[0m] - Downloading ionization energies from the NIST Atomic Spectra Database. ([1mionization.py[0m:65)




In [3]:
atom_data_a = TARDISAtomData(atomic_weights,
                             ionization_energies,
                             gfall_reader,
                             zeta_data,
                             chianti_reader)

[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Ingesting energy levels. ([1mbase.py[0m:291)


[[1m     carsus.io.kurucz.gfall[0m][[1;37mINFO[0m] - Parsing GFALL from: https://media.githubusercontent.com/media/tardis-sn/carsus-db/master/gfall/gfall_latest.dat ([1mgfall.py[0m:148)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - GFALL selected species: Li 0, Li 1, Be 0, Be 1, Be 2, B 0, B 1, B 2, B 3, C 0, C 1, C 2, C 3. ([1mbase.py[0m:325)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Chianti selected species: H 0, He 0, He 1. ([1mbase.py[0m:329)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Ingesting transition lines. ([1mbase.py[0m:381)


[[1m     carsus.io.kurucz.gfall[0m][[1;37mINFO[0m] - Extracting line data: atomic_number, ion_charge, energy_lower, j_lower, energy_upper, j_upper, wavelength, loggf. ([1mgfall.py[0m:352)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Matching lines and levels. ([1mbase.py[0m:420)


  result = getattr(ufunc, method)(*inputs, **kwargs)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Ingesting collisional strengths. ([1mbase.py[0m:562)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Matching collisions and levels. ([1mbase.py[0m:574)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Finished. ([1mbase.py[0m:77)


In [4]:
atom_data_a.to_hdf('A.h5')

[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Signing TARDISAtomData. ([1mbase.py[0m:933)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - MD5: d74e41cc613b73f604ecfdde781786eb ([1mbase.py[0m:934)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - UUID1: 5a874f1ee75e11ebbe2a00224821fa3a ([1mbase.py[0m:935)


For the second atomic file we grab species `H-C` from GFALL and `C` from Chianti.

In [5]:
chianti_reader = ChiantiReader(ions='C', collisions=True, priority=20)

In [6]:
atom_data_b = TARDISAtomData(atomic_weights,
                             ionization_energies,
                             gfall_reader,
                             zeta_data,
                             chianti_reader)

[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Ingesting energy levels. ([1mbase.py[0m:291)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - GFALL selected species: He 0, He 1, Li 0, Li 1, Be 0, Be 1, Be 2, B 0, B 1, B 2, B 3. ([1mbase.py[0m:325)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Chianti selected species: C 0, C 1, C 2, C 3, C 4, C 5. ([1mbase.py[0m:329)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Ingesting transition lines. ([1mbase.py[0m:381)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Matching lines and levels. ([1mbase.py[0m:420)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Ingesting collisional strengths. ([1mbase.py[0m:562)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Matching collisions and levels. ([1mbase.py[0m:574)


  x = (kt / delta_e) / (kt / delta_e + c)


  x = (kt / delta_e) / (kt / delta_e + c)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Finished. ([1mbase.py[0m:77)


In [7]:
atom_data_b.to_hdf('B.h5')

[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - Signing TARDISAtomData. ([1mbase.py[0m:933)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - MD5: ff14b661fc5865ddd846984209c85057 ([1mbase.py[0m:934)


[[1m      carsus.io.output.base[0m][[1;37mINFO[0m] - UUID1: 6842a996e75e11ebbe2a00224821fa3a ([1mbase.py[0m:935)


Define the following functions to compare both dataframes.

In [8]:
import pandas as pd
from carsus.util import parse_selected_species

In [9]:
def highlight_values(val):
    if val == True:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'
    
def highlight_diff(val):
    if val == 0:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'

In [10]:
def compare_levels_lines(path_a, path_b, ions='H-Zn'):
    
    # Read data
    levels_a = pd.read_hdf(path_a, key='levels')
    levels_b = pd.read_hdf(path_b, key='levels')
    lines_a = pd.read_hdf(path_a, key='lines')
    lines_b = pd.read_hdf(path_b, key='lines')
    
    # Get ions list
    ions = parse_selected_species(ions)
    
    lvl_eq = []
    lns_eq = []
    for ion in ions:
        
        # How many levels per ion in A
        try:
            num_lvl_a = len(levels_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_a = 0
            
        # How many levels per ion in B
        try:
            num_lvl_b = len(levels_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_b = 0

        # If level number is the same in A and B (and not zero) 
        # then compare cell against cell. `True` means all cells 
        # are equal in both dataframes.
        if num_lvl_a == num_lvl_b:
            val_lvl = True
            
            if num_lvl_a != 0:
                try:
                    k = levels_a.loc[ion].eq(levels_b.loc[ion]).sum().sum()
                    if num_lvl_a*3 != k:  # x3 because this df has three columns!
                        val_lvl = False
                        
                except (KeyError, TypeError, ValueError):
                    pass

        else:
            val_lvl = False
            
        # Append the results
        lvl_eq.append((ion, num_lvl_a, num_lvl_b, val_lvl))
        
        
        # Same for lines
        try:
            num_lns_a = len(lines_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_a = 0
            
        try:
            num_lns_b = len(lines_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_b = 0

        if num_lns_a == num_lns_b:
            val_lns = True
            
            if num_lns_a != 0:
                try:
                    k = lines_a.loc[ion].eq(lines_b.loc[ion]).sum().sum()
                    if num_lns_a*8 != k:
                        val_lvl = False
            
                except (KeyError, TypeError, ValueError):
                    pass
            
        else:
            val_lns = False
        
        lns_eq.append((ion, num_lns_a, num_lns_b, val_lns))
            
    df_lvl = pd.DataFrame(lvl_eq, columns=['ion', 'num_lvl_a', 'num_lvl_b', 'val_lvl'])
    df_lns = pd.DataFrame(lns_eq, columns=['ion', 'num_lns_a', 'num_lns_b', 'val_lns'])
    df = pd.merge(df_lvl, df_lns).set_index('ion')
    
    df['diff_lvl'] = abs(df['num_lvl_b'] - df['num_lvl_a'])
    df['diff_lns'] = abs(df['num_lns_b'] - df['num_lns_a'])
    df = df[['num_lvl_a', 'num_lvl_b', 'diff_lvl', 'val_lvl', 
          'num_lns_a', 'num_lns_b', 'diff_lns', 'val_lns']]

    return df

Then, the comparison is straightforward:

In [11]:
tt = compare_levels_lines('A.h5', 'B.h5', ions='H-C')





```
num_xxx_y (int) : number of levels/lines.
diff_xxx (int) : difference in number of levels/lines.
val_xxx (bool) : `True` if levels/lines have the same value.
```

In [12]:
tt.style.applymap(highlight_values, subset=['val_lvl', 'val_lns']).applymap(
                    highlight_diff, subset=['diff_lvl', 'diff_lns'])

Unnamed: 0_level_0,num_lvl_a,num_lvl_b,diff_lvl,val_lvl,num_lns_a,num_lns_b,diff_lns,val_lns
ion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(1, 0)",25,1,24,False,74,0,74,False
"(2, 0)",49,744,695,False,174,605,431,False
"(2, 1)",25,9,16,False,67,7,60,False
"(3, 0)",67,67,0,True,403,403,0,True
"(3, 1)",55,55,0,True,135,135,0,True
"(3, 2)",1,1,0,True,0,0,0,True
"(4, 0)",28,28,0,True,39,39,0,True
"(4, 1)",39,39,0,True,185,185,0,True
"(4, 2)",17,17,0,True,27,27,0,True
"(4, 3)",1,1,0,True,0,0,0,True


In [13]:
# nbsphinx hidden cell
!rm A.h5 B.h5