# Compare Atomic Files

This notebook shows how to compare the `levels_prepared` and `lines_prepared` DataFrames of the atomic files generated by Carsus.

In [1]:
import os
import pandas as pd
from carsus.util import parse_selected_species
from collections import defaultdict
from carsus.io.nist import NISTWeightsComp, NISTIonizationEnergies
from carsus.io.kurucz import GFALLReader
from carsus.io.zeta import KnoxLongZeta
from carsus.io.chianti_ import ChiantiReader
from carsus.io.output import TARDISAtomData

 ChiantiPy version 0.8.4 
 found PyQt5 widgets
 using PyQt5 widgets


In [2]:
ATOM1_PATH = os.environ.get("ATOM1_PATH", None)
ATOM2_PATH = os.environ.get("ATOM2_PATH", None)

Define the following functions to compare both dataframes.

In [3]:
import pandas as pd
from carsus.util import parse_selected_species

In [4]:
def highlight_values(val):
    if val == True:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'
    
def highlight_diff(val):
    if val == 0:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'

In [64]:
class AtomDataCompare:
    def __init__(self, d1_path=None, d2_path=None):
        self.d1_path = d1_path
        self.d2_path = d2_path
        self.setup()

    def setup(self):
        self.d1 = pd.HDFStore(self.d1_path)
        self.d2 = pd.HDFStore(self.d2_path) 

    def teardown(self):
        self.d1.close()
        self.d2.close()

    def comparision_table(self):
        d1_keys = self.d1.keys()
        d2_keys = self.d2.keys()
        self.d1_df = pd.DataFrame(index=d1_keys, columns=['exists'])
        self.d2_df = pd.DataFrame(index=d2_keys, columns=['exists'])
        self.d1_df['exists'] = True
        self.d2_df['exists'] = True
        joined_df = self.d1_df.join(self.d2_df, how='outer', lsuffix='_1', rsuffix='_2')
        joined_df = joined_df.fillna(False)
        self.tt = joined_df
    
    def compare(self, exclude_correct_matches=True):
        self.tt['match'] = None
        for index, row in self.tt.iterrows():
            if row[['exists_1', 'exists_2']].all():
                row1_df = self.d1[index]
                row2_df = self.d2[index]
                if row1_df.equals(row2_df):
                    self.tt.at[index, "match"] = True
                else:
                    self.tt.at[index, "match"] = False
            else:
                self.tt.at[index, "match"] = False
        if exclude_correct_matches:
            self.tt = self.tt[self.tt.match==False]
    
    @property
    def tt_stylized(self):
        return self.tt.style.applymap(highlight_values, subset=['exists_1','exists_2', 'match'])
                
            

In [65]:
pat = "/home/atharva/workspace/code/tardis-main/carsus/kurucz_cd23_chianti_He_cmfgen_H_Si_I-II.h5"
pat2 = "/home/atharva/workspace/code/tardis-main/refdata/atom_data/kurucz_cd23_chianti_H_He.h5"
atc=AtomDataCompare(pat, pat2)
atc.setup()
atc.comparision_table()
atc.compare()

# atc.teardown()

atc.tt

Unnamed: 0,exists_1,exists_2,match
/atom_data,True,True,False
/collision_data,False,True,False
/collision_data_temperatures,False,True,False
/collisions_data,True,False,False
/collisions_metadata,True,False,False
/ionization_data,True,True,False
/levels,False,True,False
/levels_data,True,False,False
/lines,False,True,False
/lines_data,True,False,False


In [66]:
atc.tt_stylized

Unnamed: 0,exists_1,exists_2,match
/atom_data,True,True,False
/collision_data,False,True,False
/collision_data_temperatures,False,True,False
/collisions_data,True,False,False
/collisions_metadata,True,False,False
/ionization_data,True,True,False
/levels,False,True,False
/levels_data,True,False,False
/lines,False,True,False
/lines_data,True,False,False


In [84]:
atc.d1['ionization_data'].rename("d1_ion")

atomic_number  ion_number
1              1                13.598434
2              1                24.587389
               2                54.417765
3              1                 5.391715
               2                75.640097
                                 ...     
30             26             2491.500000
               27             2669.900000
               28             2781.996000
               29            11864.939900
               30            12388.929000
Name: d1_ion, Length: 465, dtype: float64

In [85]:
concat = pd.concat([atc.d1['ionization_data'].rename("d1_ion"), atc.d2['ionization_data'].rename("d2_ion")], axis=1)

In [86]:
concat

Unnamed: 0_level_0,Unnamed: 1_level_0,d1_ion,d2_ion
atomic_number,ion_number,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,13.598434,13.598434
2,1,24.587389,24.587388
2,2,54.417765,54.417763
3,1,5.391715,5.391715
3,2,75.640097,75.640094
...,...,...,...
30,26,2491.500000,2491.500000
30,27,2669.900000,2669.900000
30,28,2781.996000,2781.996000
30,29,11864.939900,11864.939400


In [88]:
concat["match"]= True
for index, item in concat.iterrows():
    if item["d1_ion"] != item["d2_ion"]:
        item.match = False

In [90]:
concat[concat.match==False]

Unnamed: 0_level_0,Unnamed: 1_level_0,d1_ion,d2_ion,match
atomic_number,ion_number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [28]:
atc.tt

Unnamed: 0,exists_1,exists_2,match
/atom_data,True,True,False
/collision_data,False,True,False
/collision_data_temperatures,False,True,False
/collisions_data,True,False,False
/collisions_metadata,True,False,False
/ionization_data,True,True,False
/levels,False,True,False
/levels_data,True,False,False
/lines,False,True,False
/lines_data,True,False,False


In [42]:
df = atc.tt

In [50]:
df[df.match==False]

Unnamed: 0,exists_1,exists_2,match
/atom_data,True,True,False
/collision_data,False,True,False
/collision_data_temperatures,False,True,False
/collisions_data,True,False,False
/collisions_metadata,True,False,False
/ionization_data,True,True,False
/levels,False,True,False
/levels_data,True,False,False
/lines,False,True,False
/lines_data,True,False,False


In [10]:
atc.d1_df

Unnamed: 0,exists
/atom_data,True
/collisions_data,True
/collisions_metadata,True
/ionization_data,True
/levels_data,True
/lines_data,True
/macro_atom_data,True
/macro_atom_references,True
/metadata,True
/photoionization_data,True


In [11]:
atc.d2_df

Unnamed: 0,exists
/atom_data,True
/collision_data,True
/collision_data_temperatures,True
/ionization_data,True
/levels,True
/lines,True
/macro_atom_data,True
/macro_atom_references,True
/zeta_data,True


In [1]:
pat = "/home/atharva/workspace/code/tardis-main/carsus/kurucz_cd23_chianti_He_cmfgen_H_Si_I-II.h5"
pat2 = "/home/atharva/workspace/code/tardis-main/refdata/atom_data/kurucz_cd23_chianti_H_He.h5"

In [11]:
df = pd.HDFStore(pat)
df

HDF5ExtError: HDF5 error back trace

  File "H5F.c", line 509, in H5Fopen
    unable to open file
  File "H5Fint.c", line 1567, in H5F_open
    unable to lock the file
  File "H5FD.c", line 1640, in H5FD_lock
    driver lock request failed
  File "H5FDsec2.c", line 959, in H5FD_sec2_lock
    unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'

End of HDF5 error back trace

Unable to open/create file '/home/atharva/workspace/code/tardis-main/carsus/kurucz_cd23_chianti_He_cmfgen_H_Si_I-II.h5'

In [10]:
df.close

SyntaxError: invalid syntax (1922207284.py, line 1)

In [54]:
ref1_df = pd.read_hdf(pat, "collisions_data")

AttributeError: 'DataFrame' object has no attribute 'close'

In [50]:
def compare_levels_lines(path_a, path_b, ions='H-Zn'):
    
    # Read data
    levels_a = pd.read_hdf(path_a, key='levels_data')
    levels_b = pd.read_hdf(path_b, key='levels_data')
    lines_a = pd.read_hdf(path_a, key='lines_data')
    lines_b = pd.read_hdf(path_b, key='lines_data')
    
    # Get ions list
    ions = parse_selected_species(ions)
    
    lvl_eq = []
    lns_eq = []
    for ion in ions:
        
        # How many levels per ion in A
        try:
            num_lvl_a = len(levels_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_a = 0
            
        # How many levels per ion in B
        try:
            num_lvl_b = len(levels_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_b = 0

        # If level number is the same in A and B (and not zero) 
        # then compare cell against cell. `True` means all cells 
        # are equal in both dataframes.
        if num_lvl_a == num_lvl_b:
            val_lvl = True
            
            if num_lvl_a != 0:
                try:
                    k = levels_a.loc[ion].eq(levels_b.loc[ion]).sum().sum()
                    if num_lvl_a*3 != k:  # x3 because this df has three columns!
                        val_lvl = False
                        
                except (KeyError, TypeError, ValueError):
                    pass

        else:
            val_lvl = False
            
        # Append the results
        lvl_eq.append((ion, num_lvl_a, num_lvl_b, val_lvl))
        
        
        # Same for lines
        try:
            num_lns_a = len(lines_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_a = 0
            
        try:
            num_lns_b = len(lines_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_b = 0

        if num_lns_a == num_lns_b:
            val_lns = True
            
            if num_lns_a != 0:
                try:
                    k = lines_a.loc[ion].eq(lines_b.loc[ion]).sum().sum()
                    if num_lns_a*8 != k:
                        val_lvl = False
            
                except (KeyError, TypeError, ValueError):
                    pass
            
        else:
            val_lns = False
        
        lns_eq.append((ion, num_lns_a, num_lns_b, val_lns))
            
    df_lvl = pd.DataFrame(lvl_eq, columns=['ion', 'num_lvl_a', 'num_lvl_b', 'val_lvl'])
    df_lns = pd.DataFrame(lns_eq, columns=['ion', 'num_lns_a', 'num_lns_b', 'val_lns'])
    df = pd.merge(df_lvl, df_lns).set_index('ion')
    
    df['diff_lvl'] = abs(df['num_lvl_b'] - df['num_lvl_a'])
    df['diff_lns'] = abs(df['num_lns_b'] - df['num_lns_a'])
    df = df[['num_lvl_a', 'num_lvl_b', 'diff_lvl', 'val_lvl', 
          'num_lns_a', 'num_lns_b', 'diff_lns', 'val_lns']]

    return df

In [3]:
tt = compare_levels_lines('A.h5', 'B.h5', ions='H-C')

NameError: name 'compare_levels_lines' is not defined

### Custom Atomic Data

To generate and compare atomic data files locally, please uncomment the code below.<br>
For the first atomic file we grab species `H-C` from GFALL and `H-He` from Chianti.

In [None]:
# atomic_weights = NISTWeightsComp()
# ionization_energies = NISTIonizationEnergies('H-C')
# gfall_reader = GFALLReader(ions='H-C')
# chianti_reader = ChiantiReader(ions='H-He', collisions=True, priority=20)
# zeta_data = KnoxLongZeta()

In [None]:
# atom_data_a = TARDISAtomData(atomic_weights,
#                              ionization_energies,
#                              gfall_reader,
#                              zeta_data,
#                              chianti_reader)

In [None]:
# atom_data_a.to_hdf('A.h5')

For the second atomic file we grab species `H-C` from GFALL and `C` from Chianti.

In [None]:
# chianti_reader = ChiantiReader(ions='C', collisions=True, priority=20)

In [None]:
# atom_data_b = TARDISAtomData(atomic_weights,
#                              ionization_energies,
#                              gfall_reader,
#                              zeta_data,
#                              chianti_reader)

In [None]:
# atom_data_b.to_hdf('B.h5')

In [None]:
# tt = compare_levels_lines('A.h5', 'B.h5', ions='H-C')

```
num_xxx_y (int) : number of levels/lines.
diff_xxx (int) : difference in number of levels/lines.
val_xxx (bool) : `True` if levels/lines have the same value.
```

In [None]:
tt.style.applymap(highlight_values, subset=['val_lvl', 'val_lns']).applymap(
                    highlight_diff, subset=['diff_lvl', 'diff_lns'])