In [13]:
#@title # Run this cell to get all dependencies and packages ready
from google.colab import output
from itertools import compress
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import xgboost

!git clone -b Main https://github.com/alezakuskin/Stark_ML
from Stark_ML.utils.terms import *
output.clear()

In [21]:
#@title #The main part
#@markdown ### By default your will get results on test linelist Example.xlsx.

#@markdown ###To get predictions on lines of your interest, please fill and upload (section 'Files' on the left panel) your own .xslx file and specify its name in the field below:

filename = 'Andrey.xlsx' #@param {type:"string"}
filename = 'Stark_ML/' + filename

#@markdown Select whether you would like to get predictions for a single tempeature value or for a temperature range
Temperature_mode = 'range' #@param ['single', 'range']

#@markdown If you selected 'range' in the previous field, specify parameters here:
Low_T = 3000   #@param {type: "number"}
High_T = 12000 #@param {type: "number"}
T_step = 250  #@param {type: "number"}

#Importing pretrained model
model = xgboost.XGBRegressor()
model.load_model('Stark_ML/XGBoost_Pretrained.json')

#Constructing Standard Scaler
scaler = StandardScaler()
scaler.mean_  = np.loadtxt('Stark_ML/scaler_mean.txt')
scaler.scale_ = np.loadtxt('Stark_ML/scaler_scale.txt')


#Loading linelist
try:
    data_predictions = pd.read_excel(filename,
                     sheet_name='Sheet1',
                     usecols='A:BM'
                   )
except:
    data_predictions = pd.read_excel(filename[9:],
                     sheet_name='Sheet1',
                     usecols='A:BM'
                   )


#Data preprocessing
data_predictions['Term'] = term_to_number(data_predictions['Term'])
data_predictions['Term.1'] = term_to_number(data_predictions['Term.1'])
data_predictions.insert(data_predictions.columns.get_loc('E upper')+1, 'Gap to ion', 0)
data_predictions['Gap to ion'] = gap_to_ion(data_predictions, 'E upper')

if Temperature_mode == 'range':
    dtypes = data_predictions.dtypes.to_dict()
    Ts = np.arange(Low_T, High_T + 1, T_step)
    for index, row in data_predictions.iterrows():
        for T in Ts:
            row['T'] = T
            data_predictions = pd.concat([data_predictions, row.to_frame().T], ignore_index=True)
    data_predictions = data_predictions.astype(dtypes)

#Getting predictions
epsilon = 1e-3
preds = model.predict(pd.DataFrame(scaler.transform(data_predictions.drop(columns=['Element', 'Wavelength', 'Z number', 'w (A)', 'd (A)']))))
preds = (np.exp(preds) - 1) * epsilon


#building output file
columns = ['Element', 'Charge', 'Wavelength', 'T', 'w (A)']
#@markdown 

#@markdown ###Select additional transition parameters you would like to include in output file
Element_symbol = True  #@param {type: 'boolean'}
Wavelength     = True  #@param {type: 'boolean'}
Temperature    = True  #@param {type: 'boolean'}
Charge         = False #@param {type: 'boolean'}

results = pd.DataFrame(columns = list(compress(columns, [Element_symbol, Charge, Wavelength, Temperature, True])))
results['w (A)'] = preds
for i in results.columns[:-1]:
    results[i] = data_predictions[i]
results.to_csv(filename[9:-5] + '.csv', index = False)

  momentum = pd.Series()
  momentum = pd.Series()
  gap = pd.Series()


## Congratulations! If the previous cell finished execution without errors, you can now download <filename.csv> file with predicted values of Stark broadening parameter.

### For more details refer to 'paper' or contact us: ale-zakuskin@laser.chem.msu.ru