In [1]:
#Import libraries (please check whether you have installed these libraries)
import numpy as np
import pandas as pd
import pickle

## Multiple Output Model for predicting the apparent quantum yields of PPRIs 
## photochemically generated by DOM

In [2]:
#Define the Multiple output model which can simultaneously predict the lnΦ3DOM* ,lnΦ1O2 and lnΦ·OH
class MultipleOutputModel():

    #Load the developed models     
    def __init__(self):
        with open('Triplet.pickle', 'rb') as e:
            self.model_tri = pickle.load(e)
        with open('Tri-singlet.pickle', 'rb') as f:
            self.model_tri_sin = pickle.load(f)
        with open('Tri-hydroxyl.pickle', 'rb') as g:
            self.model_tri_hyd = pickle.load(g)
        with open('Singlet.pickle', 'rb') as h:
            self.model_sin = pickle.load(h)
        with open('Hydroxyl.pickle', 'rb') as i:
            self.model_hyd = pickle.load(i)
    
    #Make prediction
    def predict(self, x, regressorchain = False, export=False):
        new_x = x.copy()
        #regressorchain: default=False. Whether the regressorchain is used when predicting lnΦ1O2 and lnΦ·OH. 
        #If True, MultipleOutput model will apply the developed chain models to predict lnΦ1O2 and lnΦ·OH.
        if regressorchain == True: 
            new_x['Predicted ln(Φ3DOM*)'] = self.model_tri.predict(x)
            mult_x=[self.model_tri.predict(x),self.model_tri_sin.predict(new_x),self.model_tri_hyd.predict(new_x)]
        else:
            mult_x=[self.model_tri.predict(x),self.model_sin.predict(x),self.model_hyd.predict(x)]   
            
        df_x = pd.DataFrame(mult_x,index=['Pred lnΦ3DOM*' ,'Pred lnΦ1O2', 'Pred lnΦ·OH'])
        #export: default=False. Whether the predicted lnΦPPRIs is exported after the prediction.
        #If True, the predicted data will be exported into an Excel file.
        if export == True:
            df_x.T.to_excel('predicted AQYs.xlsx')#you can design your path to export the Excel file.
            print('predicted AQYs.xlsx is exported')
            display(df_x.T) 
            return mult_x
        else:
            display(df_x.T)
            return mult_x
    
    #Calculate R2 and RMSE for each lnΦPPRIs, if you have already calculated the observed lnΦPPRIs through photochemical experiments
    def mult_reg_score(self, true_y, pred_y, export = False):
        true_y1 = list(np.array(true_y.T))
        mult_r = []
        mult_rmse = []
        for i in range(len(pred_y)):
            y_mean = np.mean(true_y1[i])
            sse = sum((true_y1[i] - pred_y[i])**2)
            sst = sum((true_y1[i] - y_mean)**2) 
            r2 = 1 - (sse/sst)
            mult_r.append(r2)
            rmse = np.mean((true_y1[i] - pred_y[i])**2) **0.5
            mult_rmse.append(rmse)
        mult_result = pd.DataFrame({'R2':mult_r,'RMSE':mult_rmse},index=['lnΦ3DOM*' ,'lnΦ1O2', 'lnΦ·OH'])
        #export: default=False. Whether the calculated R2 and RMSE is exported.
        #If True, he calculated R2 and RMSE will be exported into an Excel file.
        if export == True:
            mult_result = pd.to_excel('mult_result.xlsx')#you can design your path to export the Excel file.
            print('mult_result.xlsx is exported')
            return mult_result
        return mult_result

In [3]:
#Import the example data from a previous literature
#The data you prepared must sort by the feature order in the example data and 
#the feature should be converted into the uniform units and calculation
feature = pd.read_excel('Example data.xlsx',sheet_name=0)#read the first sheet of file Example data.xlsx
feature.describe(include='all')#Statistical description of the example data

Unnamed: 0,DC_EfOM,DC_Isolate,DC_NOM,WR_290-400,WR_290-600,WR_UVA,pH,DOC,SUVA254,E2/E3,FIX,HIX,BIX
count,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0
mean,0.0,0.0,1.0,1.0,0.0,0.0,7.536818,7.865455,2.073714,6.526827,1.598218,0.654859,0.888286
std,0.0,0.0,0.0,0.0,0.0,0.0,0.209475,2.857009,0.413026,0.564145,0.115081,0.051329,0.080985
min,0.0,0.0,1.0,1.0,0.0,0.0,7.29,4.26,1.2381,5.6923,1.3437,0.5746,0.7379
25%,0.0,0.0,1.0,1.0,0.0,0.0,7.365,5.925,1.96505,6.10985,1.543525,0.613075,0.857025
50%,0.0,0.0,1.0,1.0,0.0,0.0,7.51,7.27,2.1228,6.45105,1.5908,0.6508,0.9171
75%,0.0,0.0,1.0,1.0,0.0,0.0,7.6925,9.0075,2.384575,7.0559,1.676875,0.698375,0.9481
max,0.0,0.0,1.0,1.0,0.0,0.0,8.17,13.02,2.6016,7.4286,1.8175,0.7639,0.9809


In [4]:
#Instantiating the Multiple Output Model
model = MultipleOutputModel()

In [5]:
#Use the model to predict the lnΦPPRIs data based on the feature from example data
predicted_y = model.predict(feature,regressorchain=False,export=False)
#If you want to apply the regressorchain, switch 'regressorchain' to True;
#If you want to export the predicted data, switch 'export' to True.

Unnamed: 0,Pred lnΦ3DOM*,Pred lnΦ1O2,Pred lnΦ·OH
0,-0.079309,1.233191,0.278167
1,0.352929,1.433963,1.080178
2,0.097976,0.866695,0.949994
3,0.593573,1.472178,1.238136
4,0.506289,1.144078,0.501819
5,0.599279,1.412128,0.520806
6,0.32395,1.027537,0.676614
7,0.22627,1.169787,0.387786
8,0.774152,1.72431,0.683584
9,0.352174,1.350249,0.653785


In [6]:
#If you have already calculated the ΦPPRIs data and want to explore predictive performance of the developed models
#you can also import the observed ΦPPRIs data to calculate the R2 and RMSE, but remember to transform ΦPPRIs data into lnΦPPRIs data。
target = pd.read_excel('Example data.xlsx',sheet_name=1)
lntarget = target.apply(np.log)
results = model.mult_reg_score(lntarget,predicted_y,export=False)
results

Unnamed: 0,R2,RMSE
lnΦ3DOM*,0.593377,0.217355
lnΦ1O2,0.887639,0.179493
lnΦ·OH,-0.013152,0.297867


In [7]:
#You can try using the regressorchain model to make prediction and see the difference of the R2 and RMSE
predicted_y1 = model.predict(feature,regressorchain=True,export=False)
results1 = model.mult_reg_score(lntarget,predicted_y1,export=False)
results1

Unnamed: 0,Pred lnΦ3DOM*,Pred lnΦ1O2,Pred lnΦ·OH
0,-0.079309,1.139956,0.022206
1,0.352929,1.423915,1.037571
2,0.097976,0.917232,0.809654
3,0.593573,1.489512,1.241382
4,0.506289,1.189237,0.486238
5,0.599279,1.400696,0.568471
6,0.32395,1.012591,0.658419
7,0.22627,1.108394,0.346116
8,0.774152,1.731263,0.553546
9,0.352174,1.314071,0.734935


Unnamed: 0,R2,RMSE
lnΦ3DOM*,0.593377,0.217355
lnΦ1O2,0.906016,0.164159
lnΦ·OH,-0.176405,0.320969
