In [1]:
#@title # Run this cell to get all dependencies and packages ready
!pip install roman
from google.colab import output
from itertools import compress
from sklearn.preprocessing import StandardScaler
from urllib import request, parse

import pandas as pd
import numpy as np
import xgboost
import roman

!git clone -b Ions https://github.com/alezakuskin/Stark_ML
from Stark_ML.utils.terms import *
output.clear()

In [4]:
import os
import shutil
if os.path.isdir('Stark_ML'):
    import Stark_ML
    def remove_readonly(func, path, _):
            os.chmod(path, stat.S_IWRITE)
            func(path)
    shutil.rmtree(Stark_ML.__path__.__dict__['_path'][0], onerror = remove_readonly)

!git clone -b Ions https://github.com/alezakuskin/Stark_ML

Cloning into 'Stark_ML'...
remote: Enumerating objects: 1704, done.[K
remote: Counting objects: 100% (588/588), done.[K
remote: Compressing objects: 100% (321/321), done.[K
remote: Total 1704 (delta 313), reused 529 (delta 266), pack-reused 1116[K
Receiving objects: 100% (1704/1704), 75.48 MiB | 21.74 MiB/s, done.
Resolving deltas: 100% (943/943), done.


In [5]:
#@title #Request data from NIST
spectra = 'All spectra' #@param {type: "string"}
#@markdown Examples of allowed spectra:
#@markdown **Ar I** or **Mg I-IV** or **All spectra** or **Fe I; Si IX,XI; Ni Co-like**

#@markdown or **H-Ar I-II** or **Mg Li-like; Al Li-like-Be-like** or **Sc-Fe K-like-Ca-like** or **198Hg I**

#@markdown

#@markdown ###Enter wavelength in *nm*:
lower = 400 #@param {type: "number"}
upper = 410 #@param {type: "number"}

#@markdown

#@markdown ###Would you like to save lines that cannot be encoded automatically to a separate file

save_for_manual_check = True #@param {type: "boolean"}

nist_params = { # error if not commented and equals 0
    'spectra': spectra,
    'limits_type': 0,
    'low_w': lower,
    'upp_w': upper,
    'unit': 1,
    'de': 0,
    'I_scale_type': 1,
    'format': 3,
    'line_out': 0,
    'en_unit': 0,
    'output': 0,
    #'bibrefs': 1,
    'page_size': 15,
    'show_obs_wl': 1,
    'show_calc_wl': 1,
    #'unc_out': 0,
    'order_out': 0,
    'max_low_enrg': '',
    'show_av': 2,
    'max_upp_enrg': '',
    'tsb_value': 0,
    'min_str': '',
    #'A_out': 0,
    #'intens_out': 'off',
    'max_str': '',
    'allowed_out': 1,
    'forbid_out': 1,
    'min_accur': '',
    'min_intens': '',
    'conf_out': 'on',
    'term_out': 'on',
    'enrg_out': 'on',
    'J_out': 'on',
    #'g_out': 'on',
    #'remove_js': 'on',
    #'no_spaces': 'on',
    #'show_diff_obs_calc': 0,
    #'show_wn': 1,
    #'f_out': 'off',
    #'S_out': 'off',
    #'loggf_out': 'off',
    'submit': 'Retrieve Data',
}

url = 'https://physics.nist.gov/cgi-bin/ASD/lines1.pl?'
data = parse.urlencode(nist_params)
req =  request.Request(url+data)
with request.urlopen(req) as resp:
    df = pd.read_csv(resp, sep='\t')
if 'sp_num' in list(df.columns):
    df = df.drop(df.loc[df['sp_num'] == 'sp_num'].index)

data_i = pd.read_excel(Stark_ML.__path__.__dict__['_path'][0] + '/Source_files/Stark_data.xlsx',
                       sheet_name='Ions',
                       usecols='A:BM',
                       nrows = 2
                   )

request_df = split_OK_check(NIST_to_StarkML(df, data_i, spectra), save_manual_check = save_for_manual_check)

2767it [00:27, 102.17it/s]


1560 lines could not be encoded correctly. Please, check them manually in for_manual_check.txt
1207 lines were encoded correctly.


In [6]:
#@title #The main part
#@markdown Currently your will get results on the NIST query above.

#@markdown You can upload you own *.txt* file or manually sanitized *for_manual_check.txt* to the panel on the left and specify the filename:

filename = 'requested_lines.txt' #@param {type:"string"}
filename = 'Stark_ML/' + filename

#@markdown Select whether you would like to get predictions for a single tempeature value or for a temperature range
Temperature_mode = 'single' #@param ['single', 'range']

#@markdown If you selected *range* in the previous field, specify all three parameters here:
Low_T = 8000   #@param {type: "number"}
High_T = 0 #@param {type: "number"}
T_step = 0  #@param {type: "number"}

#Importing pretrained model
model = xgboost.XGBRegressor()
model.load_model('Stark_ML/XGBoost_Pretrained.json')

#Constructing Standard Scaler
scaler = StandardScaler()
scaler.mean_  = np.loadtxt('Stark_ML/scaler_mean.txt')
scaler.scale_ = np.loadtxt('Stark_ML/scaler_scale.txt')


#Loading linelist
try:
    data_predictions = pd.read_csv(filename,
                                   index_col = 0
                                   )
except:
    data_predictions = pd.read_csv(filename[9:],
                                     index_col = 0
                                     )


#Data preprocessing
#data_predictions['Term'] = term_to_number(data_predictions['Term'])
#data_predictions['Term.1'] = term_to_number(data_predictions['Term.1'])
data_predictions.insert(data_predictions.columns.get_loc('E upper')+1, 'Gap to ion', 0)
data_predictions['Gap to ion'] = gap_to_ion(data_predictions, 'E upper')

if Temperature_mode == 'single':
    print('here')
    dtypes = data_predictions.dtypes.to_dict()
    for index, row in data_predictions.iterrows():
        data_predictions.loc[index, 'T'] = Low_T
    data_predictions = data_predictions.astype(dtypes)

if Temperature_mode == 'range':
    dtypes = data_predictions.dtypes.to_dict()
    Ts = np.arange(Low_T, High_T + 1, T_step)
    for index, row in data_predictions.iterrows():
        for T in Ts:
            row['T'] = T
            data_predictions = pd.concat([data_predictions, row.to_frame().T], ignore_index=True)
    data_predictions = data_predictions.astype(dtypes)

#Getting predictions
epsilon = 1e-3
preds = model.predict(pd.DataFrame(scaler.transform(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))))
preds = (np.exp(preds) - 1) * epsilon


#building output file
columns = ['Element', 'Charge', 'Wavelength', 'T', 'w (A)']
#@markdown

#@markdown ###Select additional transition parameters you would like to include in output file
Element_symbol = True  #@param {type: 'boolean'}
Wavelength     = True  #@param {type: 'boolean'}
Temperature    = True  #@param {type: 'boolean'}
Charge         = True #@param {type: 'boolean'}

results = pd.DataFrame(columns = list(compress(columns, [Element_symbol, Charge, Wavelength, Temperature, True])))
results['w (A)'] = preds
for i in results.columns[:-1]:
    results[i] = data_predictions[i]
results.to_csv(f'PREDICTED_{filename[9:-4]}.csv', index = False)

Please find and insert to '/Source_files/E_ion.csv' ionization energy value for Nd with charge 1
Please find and insert to '/Source_files/E_ion.csv' ionization energy value for Fe with charge 4
Please find and insert to '/Source_files/E_ion.csv' ionization energy value for V with charge 1
Please find and insert to '/Source_files/E_ion.csv' ionization energy value for Nb with charge 3


  gap.at[index] = float(ion_Es.loc[ion_Es['Element'] == data.loc[index]['Element']][str(data.loc[index]['Charge'])]) - data.loc[index][column_name]


TypeError: ignored

In [11]:
%debug

> [0;32m/usr/local/lib/python3.10/dist-packages/pandas/core/series.py[0m(206)[0;36mwrapper[0;34m()[0m
[0;32m    204 [0;31m        [0;32mif[0m [0mlen[0m[0;34m([0m[0mself[0m[0;34m)[0m [0;34m==[0m [0;36m1[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    205 [0;31m            [0;32mreturn[0m [0mconverter[0m[0;34m([0m[0mself[0m[0;34m.[0m[0miloc[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 206 [0;31m        [0;32mraise[0m [0mTypeError[0m[0;34m([0m[0;34mf"cannot convert the series to {converter}"[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    207 [0;31m[0;34m[0m[0m
[0m[0;32m    208 [0;31m    [0mwrapper[0m[0;34m.[0m[0m__name__[0m [0;34m=[0m [0;34mf"__{converter.__name__}__"[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> u
> [0;32m/content/Stark_ML/utils/terms.py[0m(39)[0;36mgap_to_ion[0;34m()[0m
[0;32m     37 [0;31m    [0mgap[0m [0;34m=[0m [0mpd[0m[0;34m.[0m[0mSer

In [13]:
data_predictions = pd.read_csv('requested_lines.txt', index_col=0)
data_predictions.loc[[5]]

Unnamed: 0,Element,Wavelength,Z number,Charge,1s,2s,2p,3s,3p,3d,...,10s,11s,Multiplicity.1,Term.1,Parity.1,J.1,E upper,T,w (A),d (A)
5,Nd,400.0493,60,1,2,2,6,2,6,10,...,0,0,6,5,0,3.5,26640.08,0,0,0


In [23]:
data_predictions = pd.read_csv('PREDICTED_requested_lines.csv', index_col=0)
data_predictions

Unnamed: 0_level_0,Wavelength,T,w (A)
Element,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fe,400.02520,8000,1.440522
Fe,400.04570,8000,0.493120
Fe,400.16615,8000,0.997748
Fe,400.26606,8000,0.556991
Fe,400.37616,8000,0.283672
...,...,...,...
Fe,409.66900,8000,0.268325
Fe,409.69500,8000,0.647354
Fe,409.70170,8000,0.115923
Fe,409.70831,8000,1.504202


## Congratulations! If the previous cell finished execution without errors, you can now download <filename.csv> file with predicted values of Stark broadening parameter.

### For more details refer to 'paper' or contact us: ale-zakuskin@laser.chem.msu.ru