<a href="https://colab.research.google.com/github/alezakuskin/Stark_ML/blob/Ions/Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title # Run this cell to get all dependencies and packages ready
!pip install roman

RunInColab = 'google.colab' in str(get_ipython())

from itertools import compress
from sklearn.preprocessing import StandardScaler
from urllib import request, parse

import pandas as pd
import numpy as np
import xgboost
import catboost
import roman
import joblib

# !git clone -b Ions https://github.com/alezakuskin/Stark_ML
from Stark_ML.utils.terms import *

if RunInColab:
    from google.colab import output
    def clear_output():
        output.clear()
else:
    from IPython import display
    def clear_output():
        display.clear_output()

clear_output()

In [29]:
#@title #Request data from NIST
spectra = 'TI II' #@param {type: "string"}
#@markdown Examples of allowed spectra:
#@markdown **Ar I** or **Mg I-IV** or **All spectra** or **Fe I; Si IX,XI; Ni Co-like**

#@markdown or **H-Ar I-II** or **Mg Li-like; Al Li-like-Be-like** or **Sc-Fe K-like-Ca-like** or **198Hg I**

#@markdown

#@markdown ###Enter wavelength in *nm*:
lower = 240 #@param {type: "number"}
upper = 260 #@param {type: "number"}

#@markdown

#@markdown ###Would you like to save lines that cannot be encoded automatically to a separate file

save_for_manual_check = True #@param {type: "boolean"}

nist_params = { # error if not commented and equals 0
    'spectra': spectra,
    'limits_type': 0,
    'low_w': lower,
    'upp_w': upper,
    'unit': 1,
    'de': 0,
    'I_scale_type': 1,
    'format': 3,
    'line_out': 0,
    'en_unit': 0,
    'output': 0,
    #'bibrefs': 1,
    'page_size': 15,
    'show_obs_wl': 1,
    'show_calc_wl': 1,
    #'unc_out': 0,
    'order_out': 0,
    'max_low_enrg': '',
    'show_av': 2,
    'max_upp_enrg': '',
    'tsb_value': 0,
    'min_str': '',
    #'A_out': 0,
    #'intens_out': 'off',
    'max_str': '',
    'allowed_out': 1,
    'forbid_out': 1,
    'min_accur': '',
    'min_intens': '',
    'conf_out': 'on',
    'term_out': 'on',
    'enrg_out': 'on',
    'J_out': 'on',
    #'g_out': 'on',
    #'remove_js': 'on',
    #'no_spaces': 'on',
    #'show_diff_obs_calc': 0,
    #'show_wn': 1,
    #'f_out': 'off',
    #'S_out': 'off',
    #'loggf_out': 'off',
    'submit': 'Retrieve Data',
}

url = 'https://physics.nist.gov/cgi-bin/ASD/lines1.pl?'
data = parse.urlencode(nist_params)
req =  request.Request(url+data)
with request.urlopen(req) as resp:
    df = pd.read_csv(resp, sep='\t')
if 'sp_num' in list(df.columns):
    df = df.drop(df.loc[df['sp_num'] == 'sp_num'].index)

data_i = pd.read_excel(Stark_ML.__path__.__dict__['_path'][0] + '/Source_files/Stark_data.xlsx',
                       sheet_name='Ions',
                       usecols='A:BQ',
                       nrows = 2
                   )
print(df)
request_df = split_OK_check(NIST_to_StarkML(df, data_i, spectra), save_manual_check = save_for_manual_check)

    obs_wl_air(nm)  ritz_wl_air(nm)  Aki(s^-1)  Acc    Ei(cm-1)    Ek(cm-1)  \
0       240.211648       240.211647        NaN  NaN  32332.9031  73950.1912   
1       240.726830       240.726849        NaN  NaN  32025.5915  73553.8175   
2       241.077268       241.077266        NaN  NaN  32332.9031  73800.7705   
3       241.749540       241.749556        NaN  NaN  32275.4863  73628.0430   
4       242.365846       242.365844        NaN  NaN  32275.4863  73522.8997   
..             ...              ...        ...  ...         ...         ...   
79      259.260496       259.260496        NaN  NaN  32332.9031  70892.6160   
80      259.530580       259.530600        NaN  NaN  39233.4153  77753.0000   
81      259.610260       259.610263        NaN  NaN  39233.4153  77741.1809   
82      259.918453       259.918444        NaN  NaN  43780.9533  82243.0637   
83      259.964890       259.964902        NaN  NaN  39602.8645  78058.1017   

         conf_i term_i  J_i               conf_k te

84it [00:00, 479.99it/s]

0 lines could not be encoded correctly. Please, check them manually in for_manual_check.txt





84 lines were encoded correctly.


In [24]:
#@title #The main part
#@markdown Currently your will get results on the NIST query above.

#@markdown You can upload you own *.txt* file or manually sanitized *for_manual_check.txt* to the panel on the left and specify the filename:

filename = 'requested_lines.txt' #@param {type:"string"}
filename = 'Stark_ML/' + filename

#@markdown Select whether you would like to get predictions for a single tempeature value or for a temperature range
Temperature_mode = 'range' #@param ['single', 'range']

#@markdown If you selected *range* in the previous field, specify all three parameters here:
Low_T = 8000   #@param {type: "number"}
High_T = 10000 #@param {type: "number"}
T_step = 100  #@param {type: "number"}

#Importing pretrained models
model1 = xgboost.XGBRegressor()
model1.load_model('Stark_ML/XGB_A+I_Eraw_Raw_No.json')

model2 = xgboost.XGBRegressor()
model2.load_model('Stark_ML/XGB_A+I_Enorm_Aug_No.json')

model3 = catboost.CatBoostRegressor()
model3.load_model('Stark_ML/CatBoost_A+I_Enorm_Raw_No.json')

model4 = joblib.load('Stark_ML/LightGBM_A+I_Eraw_Raw_No.pkl')

model5 = joblib.load('Stark_ML/LightGBM_A+I_Enorm_Raw_Scaler.pkl')

#Loading Standard Scaler
scaler = joblib.load('Stark_ML/scaler_width.pkl')

#Loading linelist
try:
    data_predictions = pd.read_csv(filename,
                                   index_col = 0
                                   )
except:
    data_predictions = pd.read_csv(filename[9:],
                                     index_col = 0
                                     )
    
#Data preprocessing
data_predictions.insert(data_predictions.columns.get_loc('E upper')+1, 'Gap to ion', 0)
data_predictions['Gap to ion'] = gap_to_ion(data_predictions, 'E upper')
data_predictions = data_predictions

if Temperature_mode == 'single':
    print('here')
    dtypes = data_predictions.dtypes.to_dict()
    for index, row in data_predictions.iterrows():
        data_predictions.at[index, 'T'] = Low_T
    data_predictions = data_predictions.astype(dtypes)

if Temperature_mode == 'range':
    dtypes = data_predictions.dtypes.to_dict()
    Ts = np.arange(Low_T, High_T + 1, T_step)
    for index, row in data_predictions.iterrows():
        data_predictions.at[index, 'T'] = Low_T
        for T in Ts:
            if T == Low_T:
                continue
            row['T'] = T
            data_predictions = pd.concat([data_predictions, row.to_frame().T], ignore_index=True)
    data_predictions = data_predictions.astype(dtypes)
data_predictions = data_predictions.sort_values(['Wavelength', 'T']).reset_index(drop = True)
    
#Getting predictions
epsilon = 1e-3
pred1 = model1.predict(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))
pred2 = model2.predict(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))
pred3 = model3.predict(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))
pred4 = model4.predict(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))
pred5 = model5.predict(scaler.transform(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)'])))
preds = (pred1 + pred2 + pred3 + pred4 + pred5)/5
preds = (np.exp(preds) - 1) * epsilon

#building output file
columns = ['Element', 'Charge', 'Wavelength', 'T', 'w (A)']
#@markdown

#@markdown ###Select additional transition parameters you would like to include in output file
Element_symbol = True  #@param {type: 'boolean'}
Wavelength     = True  #@param {type: 'boolean'}
Temperature    = True  #@param {type: 'boolean'}
Charge         = True #@param {type: 'boolean'}

results = pd.DataFrame(columns = list(compress(columns, [Element_symbol, Charge, Wavelength, Temperature, True])))
results['w (A)'] = preds
for i in results.columns[:-1]:
    results[i] = data_predictions[i]
results.to_csv(f'PREDICTED_{filename[9:-4]}.csv', index = False)

In [15]:
data_predictions = data_predictions.sort_values(['Wavelength', 'T']).reset_index(drop = True)
data_predictions

Unnamed: 0,Element,Wavelength,Z number,Charge,1s,2s,2p,3s,3p,3d,...,11s,Multiplicity.1,Term.1,Parity.1,J.1,E upper,Gap to ion,T,w (A),d (A)
0,Ti,240.3987,22,0,2,2,6,2,6,2,...,0,1,3,0,3.0,41584.908,13488.092,8000,0,0
1,Ti,240.3987,22,0,2,2,6,2,6,2,...,0,1,3,0,3.0,41584.908,13488.092,9000,0,0
2,Ti,240.3987,22,0,2,2,6,2,6,2,...,0,1,3,0,3.0,41584.908,13488.092,10000,0,0
3,Ti,241.1369,22,0,2,2,6,2,6,2,...,0,3,3,0,3.0,41457.639,13615.361,8000,0,0
4,Ti,241.1369,22,0,2,2,6,2,6,2,...,0,3,3,0,3.0,41457.639,13615.361,9000,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133,Ti,259.6584,22,0,2,2,6,2,6,2,...,0,3,3,0,4.0,38670.723,16402.277,9000,0,0
134,Ti,259.6584,22,0,2,2,6,2,6,2,...,0,3,3,0,4.0,38670.723,16402.277,10000,0,0
135,Ti,259.9904,22,0,2,2,6,2,6,2,...,0,3,3,0,2.0,38451.309,16621.691,8000,0,0
136,Ti,259.9904,22,0,2,2,6,2,6,2,...,0,3,3,0,2.0,38451.309,16621.691,9000,0,0


In [11]:
print(data_predictions)

None


In [4]:
results = pd.DataFrame(columns = list(compress(columns, [Element_symbol, Charge, Wavelength, Temperature, True])))
results['w (A)'] = preds
for i in results.columns[:-1]:
    results[i] = data_predictions[i]
results.to_csv(f'PREDICTED_{filename[9:-4]}.csv', index = False)

  gap = pd.Series()


## Congratulations! If the previous cell finished execution without errors, you can now download <filename.csv> file with predicted values of Stark broadening parameter.

### For more details refer to 'paper' or contact us: ale-zakuskin@laser.chem.msu.ru