In [78]:
import pandas as pd
import re
from io import StringIO
import requests



In [79]:
columns = 'Wavelength Lower_Level Lower_Type Lower_J Upper_Level Upper_Type Upper_J log_gf gA CF'
target_url = 'https://hosting.umons.ac.be/html/agif/databases/tables/'
ions = {57: 'La', 
        58: 'Ce', 
        59: 'Pr', 
        60: 'Nd', 
        61: 'Pm', 
        62: 'Sm', 
        63: 'Eu', 
        64: 'Gd', 
        65: 'Tb', 
        66: 'Dy', 
        67: 'Ho', 
        68: 'Er', 
        69: 'Tm', 
        70: 'Yb', 
        71: 'Lu'
       }
ion_stages = {0: 'I', 1: 'II', 2: 'III', 3: 'IV'}
maximum_ion_stage = 3


In [238]:
def get_ion_data(Z, stage):
    ion = ions[Z] + ion_stages[stage] + '.txt'
#     print('Now working on:', ion)
    response = requests.get(target_url + ion)
    if response.status_code == 404:
        response = requests.get(target_url + ion[:-4]+'_cor.txt')
        if response.status_code == 404:
            print(ion[:-4], '\t not found')
            return
    
    raw_data = response.text.split('\n', 6)[6]
    raw_data = columns + '\n' + raw_data
    raw_data = re.sub(' +', ' ', raw_data)
    raw_data = re.sub(' \n+', '\n', raw_data)
#     raw_data = raw_data.replace('NORM', '-1')
    raw_data = raw_data.replace('\t', ' ')[:-4]
#     raw_data = raw_data.replace('*', '')[:-4]
#     print(raw_data)
    virt_file = StringIO(raw_data)
    df = pd.read_csv(virt_file, delimiter=' ', skipfooter=1, engine='python')
    df['Z'] = Z
    df['C'] = stage
    df = df.set_index(['Z', 'C'])
#     print(df)
    print(ion[:-4], '\t loaded')
    return df

In [239]:
def get_all_ion_between(Z_min, Z_max, stage_min, stage_max):
    dataframe = pd.DataFrame(columns = ['Z', 'C'] + columns.split(' '))
    dataframe = dataframe.set_index(['Z', 'C'])
    for Z in range(Z_min, Z_max+1):
        for stage in range(stage_min, stage_max+1):
            print(Z, stage)
            ion_df = get_ion_data(Z, stage)
            dataframe = pd.concat([dataframe, ion_df])
    return dataframe

In [245]:
# get_ion_data(60, 1)
atomic_data = get_all_ion_between(57, 71, 0, 3)

57 0
LaI 	 loaded
57 1
LaII 	 not found
57 2
LaIII 	 loaded
57 3
LaIV 	 not found
58 0
CeI 	 not found
58 1
CeII 	 loaded
58 2
CeIII 	 loaded
58 3
CeIV 	 not found
59 0
PrI 	 not found
59 1
PrII 	 loaded
59 2
PrIII 	 loaded
59 3
PrIV 	 loaded
60 0
NdI 	 not found
60 1
NdII 	 loaded
60 2
NdIII 	 loaded
60 3
NdIV 	 loaded
61 0
PmI 	 not found
61 1
PmII 	 not found
61 2
PmIII 	 not found
61 3
PmIV 	 not found
62 0
SmI 	 not found
62 1
SmII 	 loaded
62 2
SmIII 	 loaded
62 3
SmIV 	 not found
63 0
EuI 	 not found
63 1
EuII 	 not found
63 2
EuIII 	 loaded
63 3
EuIV 	 not found
64 0
GdI 	 not found
64 1
GdII 	 not found
64 2
GdIII 	 loaded
64 3
GdIV 	 not found
65 0
TbI 	 not found
65 1
TbII 	 not found
65 2
TbIII 	 loaded
65 3
TbIV 	 not found
66 0
DyI 	 not found
66 1
DyII 	 not found
66 2
DyIII 	 loaded
66 3
DyIV 	 not found
67 0
HoI 	 not found
67 1
HoII 	 not found
67 2
HoIII 	 loaded
67 3
HoIV 	 not found
68 0
ErI 	 not found
68 1
ErII 	 loaded
68 2
ErIII 	 loaded
68 3
ErIV 	 not found
6

In [246]:
atomic_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Wavelength,Lower_Level,Lower_Type,Lower_g,Upper_Level,Upper_Type,Upper_g,log_gf,gA,CF
Z,C,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
57,0,3175.982,0,(e),1.5,31477,(o),2.5,-0.74,121000000.0,0.047
57,0,3215.810,1053,(e),2.5,32141,(o),3.5,-0.45,232000000.0,0.052
57,0,3235.643,0,(e),1.5,30897,(o),2.5,-1.94,7420000.0,0.060
57,0,3247.034,0,(e),1.5,30788,(o),2.5,-0.99,63400000.0,0.064
57,0,3285.926,1053,(e),2.5,31477,(o),2.5,-1.74,11400000.0,0.018
...,...,...,...,...,...,...,...,...,...,...,...
71,2,7310.129,86681,(e),0.5,100357,(o),0.5,-0.01,122000000.0,1.000
71,2,7534.288,92322,(e),1.5,105591,(o),2.5,0.44,321000000.0,1.000
71,2,7936.530,93108,(e),2.5,105704,(o),3.5,0.57,393000000.0,1.000
71,2,8008.692,93108,(e),2.5,105591,(o),2.5,-0.74,19100000.0,1.000


In [253]:
import time
timestr = time.strftime("%Y%m%d-%H%M")
output_name = 'DREAM_atomic_data_'+timestr+'.h5'
print(output_name)
atomic_data.to_hdf(output_name, 'atomic_data')

DREAM_atomic_data_20210217-1633.h5
