<a href="https://colab.research.google.com/github/alezakuskin/Stark_ML/blob/Ions/Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title # Run this cell to get all dependencies and packages ready
!pip install roman

RunInColab = 'google.colab' in str(get_ipython())

from itertools import compress
from urllib import request, parse

import pandas as pd
import numpy as np
import xgboost
import catboost
import roman
import joblib

# !git clone -b Ions https://github.com/alezakuskin/Stark_ML
from Stark_ML.utils.terms import *

if RunInColab:
    from google.colab import output
    def clear_output():
        output.clear()
else:
    from IPython import display
    def clear_output():
        display.clear_output()
        
def predict_width(data_for_prediction):
    '''
    Get predicted Stark broadening parameters for input lines
    
    Parameters
    ----------
    data_for_prediction : pd.DataFrame, dataframe with any number of rows,
        all values of input features filled in; without "Element", "Wavelength",
        "Z number", "w (A)", "d (A)" columns.
    
    Returns
    ----------
    numpy.ndarray
        A one-dimentional array with predicted values of broadening parameters in \u212B
    '''
    #Importing pretrained models
    model1 = xgboost.XGBRegressor()
    model1.load_model('Stark_ML/XGB_A+I_Eraw_Raw_No.json')

    model2 = xgboost.XGBRegressor()
    model2.load_model('Stark_ML/XGB_A+I_Enorm_Aug_No.json')

    model3 = catboost.CatBoostRegressor()
    model3.load_model('Stark_ML/CatBoost_A+I_Enorm_Raw_No.json')

    model4 = joblib.load('Stark_ML/LightGBM_A+I_Eraw_Raw_No.pkl')

    model5 = joblib.load('Stark_ML/LightGBM_A+I_Enorm_Raw_Scaler.pkl')

    #Loading Standard Scaler
    scaler = joblib.load('Stark_ML/scaler_width.pkl')
    
    #Getting predictions
    epsilon = 1e-3
    pred1 = model1.predict(data_for_prediction)
    pred2 = model2.predict(data_for_prediction)
    pred3 = model3.predict(data_for_prediction)
    pred4 = model4.predict(data_for_prediction)
    pred5 = model5.predict(scaler.transform(data_for_prediction))
    preds = (pred1 + pred2 + pred3 + pred4 + pred5)/5
    preds = (np.exp(preds) - 1) * epsilon
    
    return(preds)

def predict_shift(data_for_prediction):
    '''
    Get predicted Stark shift parameters for input lines
    
    Parameters
    ----------
    data_for_prediction : pd.DataFrame, dataframe with any number of rows,
        all values of input features filled in; without "Element", "Wavelength",
        "Z number", "w (A)", "d (A)" columns.
    
    Returns
    ----------
    numpy.ndarray
        A two-dimentional array with predicted values of both broadening (1-st column)
        and shift (2nd column) parameters in \u212B
    '''
    #Importing pretrained models
    model = joblib.load('Stark_ML/RF_Both_Eraw_Aug_No.pkl')

    #Get broadening predictions first
    widths = predict_width(data_for_prediction)
    
    #Adjust input data
    data_for_prediction['w (A)'] = widths
    data_for_prediction = data_for_prediction[model.model.feature_names_in_]
    
    #Get shift predictions
    preds = model.predict(data_for_prediction)
    
    return(np.column_stack((widths, preds)))

clear_output()

In [4]:
!pip install mariadb

Defaulting to user installation because normal site-packages is not writeable
Collecting mariadb
  Downloading mariadb-1.1.10-cp311-cp311-win_amd64.whl.metadata (3.2 kB)
Downloading mariadb-1.1.10-cp311-cp311-win_amd64.whl (197 kB)
   ---------------------------------------- 0.0/197.6 kB ? eta -:--:--
   -- ------------------------------------- 10.2/197.6 kB ? eta -:--:--
   ------------------ --------------------- 92.2/197.6 kB 1.7 MB/s eta 0:00:01
   ---------------------------------------  194.6/197.6 kB 2.4 MB/s eta 0:00:01
   ---------------------------------------- 197.6/197.6 kB 2.0 MB/s eta 0:00:00
Installing collected packages: mariadb
Successfully installed mariadb-1.1.10


In [9]:
import mariadb
conn = mariadb.connect(
        user="alex",
        password="xkk_6yoen",
        host="laser365-1.chem.msu.ru",
        port=3306,
        database="kurucz")

In [2]:
#@title #Request data from NIST
spectra = 'Ti II' #@param {type: "string"}
#@markdown Examples of allowed spectra:
#@markdown **Ar I** or **Mg I-IV** or **All spectra** or **Fe I; Si IX,XI; Ni Co-like**

#@markdown or **H-Ar I-II** or **Mg Li-like; Al Li-like-Be-like** or **Sc-Fe K-like-Ca-like** or **198Hg I**

#@markdown

#@markdown ###Enter wavelength in *nm*:
lower = 240 #@param {type: "number"}
upper = 260 #@param {type: "number"}

target = "both" #@param ["broadening", "shift", "both"] {type:"raw"}

#@markdown

#@markdown ###Would you like to save lines that cannot be encoded automatically to a separate file

save_for_manual_check = True #@param {type: "boolean"}

nist_params = { # error if not commented and equals 0
    'spectra': spectra,
    'limits_type': 0,
    'low_w': lower,
    'upp_w': upper,
    'unit': 1,
    'de': 0,
    'I_scale_type': 1,
    'format': 3,
    'line_out': 0,
    'en_unit': 0,
    'output': 0,
    #'bibrefs': 1,
    'page_size': 15,
    'show_obs_wl': 1,
    'show_calc_wl': 1,
    #'unc_out': 0,
    'order_out': 0,
    'max_low_enrg': '',
    'show_av': 2,
    'max_upp_enrg': '',
    'tsb_value': 0,
    'min_str': '',
    #'A_out': 0,
    #'intens_out': 'off',
    'max_str': '',
    'allowed_out': 1,
    'forbid_out': 1,
    'min_accur': '',
    'min_intens': '',
    'conf_out': 'on',
    'term_out': 'on',
    'enrg_out': 'on',
    'J_out': 'on',
    #'g_out': 'on',
    #'remove_js': 'on',
    #'no_spaces': 'on',
    #'show_diff_obs_calc': 0,
    #'show_wn': 1,
    #'f_out': 'off',
    #'S_out': 'off',
    #'loggf_out': 'off',
    'submit': 'Retrieve Data',
}

url = 'https://physics.nist.gov/cgi-bin/ASD/lines1.pl?'
data = parse.urlencode(nist_params)
req =  request.Request(url+data)
with request.urlopen(req) as resp:
    df = pd.read_csv(resp, sep='\t')
if 'sp_num' in list(df.columns):
    df = df.drop(df.loc[df['sp_num'] == 'sp_num'].index)

data_i = pd.read_excel(Stark_ML.__path__.__dict__['_path'][0] + '/Source_files/Stark_data.xlsx',
                       sheet_name='Ions',
                       usecols='A:BQ',
                       nrows = 2
                   )
request_df = split_OK_check(NIST_to_StarkML(df, data_i, spectra), save_manual_check = save_for_manual_check)

84it [00:00, 488.37it/s]


0 lines could not be encoded correctly. Please, check them manually in for_manual_check.txt
84 lines were encoded correctly.


In [3]:
#@title #The main part
#@markdown Currently your will get results on the NIST query above.

#@markdown You can upload you own *.txt* file or manually sanitized *for_manual_check.txt* to the panel on the left and specify the filename:

filename = 'requested_lines.txt' #@param {type:"string"}
filename = 'Stark_ML/' + filename

#@markdown Select whether you would like to get predictions for a single tempeature value or for a temperature range
Temperature_mode = 'range' #@param ['single', 'range']

#@markdown If you selected *range* in the previous field, specify all three parameters here:
Low_T = 8000   #@param {type: "number"}
High_T = 10000 #@param {type: "number"}
T_step = 100  #@param {type: "number"}



#Loading linelist
try:
    data_predictions = pd.read_csv(filename,
                                   index_col = 0
                                   )
except:
    data_predictions = pd.read_csv(filename[9:],
                                     index_col = 0
                                     )
    
#Data preprocessing
data_predictions.insert(data_predictions.columns.get_loc('E upper')+1, 'Gap to ion', 0)
data_predictions['Gap to ion'] = gap_to_ion(data_predictions, 'E upper')
data_predictions = data_predictions

if Temperature_mode == 'single':
    print('here')
    dtypes = data_predictions.dtypes.to_dict()
    for index, row in data_predictions.iterrows():
        data_predictions.at[index, 'T'] = Low_T
    data_predictions = data_predictions.astype(dtypes)

if Temperature_mode == 'range':
    dtypes = data_predictions.dtypes.to_dict()
    Ts = np.arange(Low_T, High_T + 1, T_step)
    for index, row in data_predictions.iterrows():
        data_predictions.at[index, 'T'] = Low_T
        for T in Ts:
            if T == Low_T:
                continue
            row['T'] = T
            data_predictions = pd.concat([data_predictions, row.to_frame().T], ignore_index=True)
    data_predictions = data_predictions.astype(dtypes)
data_predictions = data_predictions.sort_values(['Wavelength', 'T']).reset_index(drop = True)
    
#Get predictions
if target == 'broadening':
    preds = predict_width(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))
    preds = pd.Series(preds, name = 'w (A)')
if target == 'shift':
    preds = predict_shift(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))[:, 1]
    preds = pd.Series(preds, name = 'd (A)')
if target == 'both':
    preds = predict_shift(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))
    preds = pd.DataFrame(preds, columns = ['w (A)', 'd (A)'])
    
    
#building output file
columns = ['Element', 'Charge', 'Wavelength', 'T', 'w (A)', 'd (A)']
#@markdown

#@markdown ###Select additional transition parameters you would like to include in output file
Element_symbol = True  #@param {type: 'boolean'}
Wavelength     = True  #@param {type: 'boolean'}
Temperature    = True  #@param {type: 'boolean'}
Charge         = True #@param {type: 'boolean'}

results = pd.DataFrame(columns = list(compress(columns, [Element_symbol, Charge, Wavelength, Temperature,
                                                         True if (target == 'broadening') | (target == 'both') else False,
                                                        True if (target == 'shift') | (target == 'both') else False])))
results = pd.concat(
        [
        data_predictions[list(compress(columns, [Element_symbol, Charge, Wavelength, Temperature]))],
        preds,
        ],
    axis = 1
    )
results.to_csv(f'PREDICTED_{filename[9:-4]}.csv', index = False)

## Congratulations! If the previous cell finished execution without errors, you can now download <filename.csv> file with predicted values of Stark broadening parameter.

### For more details refer to 'paper' or contact us: ale-zakuskin@laser.chem.msu.ru