In [1]:
#Import libraries (please check whether you have installed these libraries)
import numpy as np
import pandas as pd
import pickle

## Multiple Output Model for predicting the apparent quantum yields of PPRIs 
## photochemically generated by DOM

In [33]:
#Define the Multiple output model which can simultaneously predict the lnΦ3DOM* ,lnΦ1O2 and lnΦ·OH
class MultipleOutputModel():

    #Load the developed models     
    def __init__(self):
        with open('best_cbr_SP.pickle', 'rb') as e:
            self.model_tri_sp = pickle.load(e)
        with open('best_cbr_15N.pickle', 'rb') as f:
            self.model_tri_15n = pickle.load(f)
        with open('best_cbr_18O.pickle', 'rb') as g:
            self.model_tri_18o = pickle.load(g)
        
    
    #Make prediction
    def predict(self, x, regressorchain = False, export=False):
        newx = x.copy()
        x1 = newx.iloc[:,5:21]
        x2 = newx.iloc[:,5:21]
        x3 = newx.iloc[:,5:21]
        
        if regressorchain == True: 
            x2['Pred 15N'] = self.model_tri.predict(x1)
            x3['Pred 18O'] = self.model_tri.predict(x1)
            mult_x=[self.model_tri.predict(x1),self.model_tri_sin.predict(x2),self.model_tri_hyd.predict(x3)]
        else:
            mult_x=[self.model_tri_sp.predict(x1),self.model_tri_15n.predict(x2),self.model_tri_18o.predict(x3)]   
            
        df_x = pd.DataFrame(mult_x,index=['Pred SP' ,'Pred 15N', 'Pred 18O'])
        #export: default=False. Whether the predicted PPRIs is exported after the prediction.
        #If True, the predicted data will be exported into an Excel file.
        if export == True:
            df_x.T.to_excel('predicted N2O.xlsx')#you can design your path to export the Excel file.
            print('predicted N2O.xlsx is exported')
            display(df_x.T) 
            return mult_x
        else:
            display(df_x.T)
            return mult_x
    
    #Calculate R2 and RMSE for each PPRIs, if you have already calculated the observed PPRIs through photochemical experiments
    def mult_reg_score(self, true_y, pred_y, export = False):
        true_y1 = list(np.array(true_y.T))
        mult_r = []
        mult_rmse = []
        for i in range(len(pred_y)):
            y_mean = np.mean(true_y1[i])
            sse = sum((true_y1[i] - pred_y[i])**2)
            sst = sum((true_y1[i] - y_mean)**2) 
            r2 = 1 - (sse/sst)
            mult_r.append(r2)
            rmse = np.mean((true_y1[i] - pred_y[i])**2) **0.5
            mult_rmse.append(rmse)
        mult_result = pd.DataFrame({'R2':mult_r,'RMSE':mult_rmse},index=['SP' ,'15N', '18O'])
        #export: default=False. Whether the calculated R2 and RMSE is exported.
        #If True, he calculated R2 and RMSE will be exported into an Excel file.
        if export == True:
            mult_result = pd.to_excel('mult_result.xlsx')#you can design your path to export the Excel file.
            print('mult_result.xlsx is exported')
            return mult_result
        return mult_result

In [34]:
#Import the example data from a previous literature
#The data you prepared must sort by the feature order in the example data and 
#the feature should be converted into the uniform units and calculation
feature = pd.read_excel('out_surface_n2o.xlsx',sheet_name=0)#read the first sheet of file Example data.xlsx
feature.describe(include='all')#Statistical description of the example data

Unnamed: 0,cruise,date,year,month,day,Latitude,Longitude,Depth,Temperature,Salinity,...,CT,pH,pCO2,NO3,PO4,SiOH4,N2O,n2o_ppb,dn2o_ppb,atmPressure
count,152549,152549,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,...,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,152549.0,152537.0
unique,168,,,,,,,,,,...,,,,,,,,,,
top,M98,,,,,,,,,,...,,,,,,,,,,
freq,23272,,,,,,,,,,...,,,,,,,,,,
mean,,2000-02-14 17:01:39.375282560,1999.582914,7.04436,14.394431,-8.58081,169.647408,2.877985,19.074914,35.347594,...,2050.467167,8.080845,362.411787,3.432609,0.397719,3.795814,9.19882,370.177102,54.60355,0.998953
min,,1971-06-09 00:00:00,1971.0,1.0,1.0,-76.955,0.0,0.0,-2.1,0.0,...,1004.637947,7.660125,152.605861,-1.360138,-0.020271,0.530279,3.41,174.975256,-143.980635,0.940552
25%,,1988-02-19 00:00:00,1988.0,6.0,7.0,-25.0919,12.6151,0.0,15.066,34.914425,...,2018.080017,8.053215,332.923179,0.039582,0.118902,1.221046,6.23681,312.6,2.524243,0.996818
50%,,1997-07-21 00:00:00,1997.0,7.0,14.0,-13.3,190.655,5.0,19.674254,35.461,...,2072.357615,8.076885,363.939451,0.902619,0.309642,1.566219,8.264438,325.8752,10.892972,1.001046
75%,,2013-08-03 00:00:00,2013.0,9.0,21.0,6.355,323.7,5.0,25.931,35.958447,...,2092.02359,8.107924,388.116248,3.941964,0.526046,1.9783,10.7186,365.5789,44.58663,1.004324
max,,2018-06-04 00:00:00,2018.0,12.0,31.0,88.394333,359.999996,9.973,31.896,40.889,...,2224.546028,8.295753,842.743016,30.603722,2.109039,77.642328,842.972937,30401.75815,30076.23972,1.025291


In [35]:
#Instantiating the Multiple Output Model
model = MultipleOutputModel()

In [39]:
predicted_y = model.predict(feature,regressorchain=False,export=True)

predicted AQYs.xlsx is exported


Unnamed: 0,Pred SP,Pred 15N,Pred 18O
0,16.780690,7.533541,40.621792
1,19.212141,7.314965,45.337681
2,18.765856,6.929652,44.184303
3,19.897849,6.597153,44.361980
4,21.920104,7.200543,41.527990
...,...,...,...
152544,21.677507,8.388708,46.558981
152545,21.677507,8.388708,46.558981
152546,21.689216,8.388708,46.558981
152547,21.677507,8.388708,46.558981


In [None]:
#If you have already calculated the PPRIs data and want to explore predictive performance of the developed models
#you can also import the observed PPRIs data to calculate the R2 and RMSE, but remember to transform PPRIs data into PPRIs data
target = pd.read_excel('out_surface_n2o.xlsx',sheet_name=0)
results = model.mult_reg_score(lntarget,predicted_y,export=False)
results