In [7]:

import pandas as pd
from sklearn.preprocessing import PowerTransformer
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
from keras.models import load_model
import math


df_testing = pd.read_excel('database_new.xlsx')
pt = PowerTransformer(method='box-cox')

X_test = df_testing.iloc[:, :8]
X_test = pt.fit_transform(X_test+0.00000000001)


model = load_model('whole_data_bm/best_model.hdf5')
Y_test = model.predict(X_test)

# Computing others
Y_test = pd.DataFrame(data=Y_test, columns=["q_abs", "q_sca", "g"])

wavelength = df_testing['wavelength']
fractal_dimension = df_testing['fractal_dimension']
fraction_of_coating = df_testing['fraction_of_coating']
primary_particle_size = df_testing['primary_particle_size']
number_of_primary_particles = df_testing['number_of_primary_particles']
vol_equi_radius_inner = df_testing['vol_equi_radius_inner']
vol_equi_radius_outer = df_testing['vol_equi_radius_outer']
equi_mobility_dia = df_testing['equi_mobility_dia']

mie_epsilon = np.zeros_like(wavelength) + 2
length_scale_factor = 2 * math.pi / wavelength

m_real_bc=np.empty_like(wavelength)
for i in range(0,len(wavelength)):
    if wavelength[i]==467:
        m_real_bc[i]=1.92
    elif wavelength[i]==530:
        m_real_bc[i]=1.96
    elif wavelength[i]==660:
        m_real_bc[i]=2
    else:
        m_real_bc[i]=np.nan

m_im_bc = np.empty_like(wavelength)
for i in range(0, len(wavelength)):
    if wavelength[i] == 467:
        m_im_bc[i] = 0.67
    elif wavelength[i] == 530:
        m_im_bc[i] = 0.65
    elif wavelength[i] == 660:
        m_im_bc[i] = 0.63
    else:
        m_im_bc[i] = np.nan

m_real_organics = np.empty_like(wavelength)
for i in range(0, len(wavelength)):
    if wavelength[i] == 467:
        m_real_organics[i] = 1.59
    elif wavelength[i] == 530:
        m_real_organics[i] = 1.47
    elif wavelength[i] == 660:
        m_real_organics[i] = 1.47
    else:
        m_real_organics[i] = np.nan

m_im_organics = np.empty_like(wavelength)
for i in range(0, len(wavelength)):
    if wavelength[i] == 467:
        m_im_organics[i] = 0.11
    elif wavelength[i] == 530:
        m_im_organics[i] = 0.04
    elif wavelength[i] == 660:
        m_im_organics[i] = 0
    else:
        m_im_organics[i] = np.nan

volume_total = (4 * math.pi * (vol_equi_radius_outer ** 3)) / 3
volume_bc = (4 * math.pi * (vol_equi_radius_inner ** 3)) / 3
volume_organics = volume_total - volume_bc

density_bc = np.zeros_like(wavelength) + 1.5 #Check
density_organics = np.zeros_like(wavelength) + 1.1 #Check

mass_bc = volume_bc * density_bc * (1 / 1000000000000000000000)
mass_organics = volume_organics * density_organics * (1 / 1000000000000000000000)
mass_total = mass_bc + mass_organics
mr_total_bc = mass_total / mass_bc
mr_nonbc_bc = mass_organics / mass_bc

q_abs = Y_test['q_abs']
q_sca = Y_test['q_sca']
q_ext = q_abs + q_sca
g = Y_test['g']
c_geo = (math.pi) * ((vol_equi_radius_outer) ** 2)
c_ext = (q_ext * c_geo) / (float(1000000))
c_abs = q_abs * c_geo / (1000000)
c_sca = q_sca * c_geo / (1000000)
ssa = q_sca / q_ext
mac_total = (c_abs) / (mass_total * 1000000000000)
mac_bc = c_abs / (mass_bc * (1000000000000))
mac_organics = c_abs / (mass_organics * (1000000000000))

final = np.stack((wavelength, fractal_dimension, fraction_of_coating, primary_particle_size,
                  number_of_primary_particles, vol_equi_radius_inner, vol_equi_radius_outer, equi_mobility_dia,
                  mie_epsilon, length_scale_factor, m_real_bc, m_im_bc, m_real_organics, m_im_organics,
                  volume_total, volume_bc, volume_organics, density_bc, density_organics, mass_total, mass_organics,
                  mass_bc, mr_total_bc, mr_nonbc_bc, q_ext, q_abs, q_sca, g, c_geo, c_ext, c_abs, c_sca, ssa,
                  mac_total, mac_bc, mac_organics), axis=1)

final_dataset = pd.DataFrame(data=final, columns=['wavelength', 'fractal_dimension', 'fraction_of_coating', 'primary_particle_size',
                  'number_of_primary_particles', 'vol_equi_radius_inner', 'vol_equi_radius_outer', 'equi_mobility_dia',
                  'mie_epsilon', 'length_scale_factor', 'm_real_bc', 'm_im_bc', 'm_real_organics', 'm_im_organics',
                  'volume_total', 'volume_bc', 'volume_organics', 'density_bc', 'density_organics', 'mass_total', 'mass_organics',
                  'mass_bc', 'mr_total_bc', 'mr_nonbc_bc', 'q_ext', 'q_abs', 'q_sca', 'g', 'c_geo', 'c_ext', 'c_abs', 'c_sca', 'ssa',
                  'mac_total', 'mac_bc', 'mac_organics'])
final_dataset.to_csv('sample_predicted_forward_dataset_ANN.csv', index=False)




In [6]:

import pandas as pd
from sklearn.preprocessing import PowerTransformer
from sklearn.kernel_ridge import KernelRidge
import pickle

df = pd.read_excel('database_new.xlsx')
X = df.iloc[:, :8]
Y = df.iloc[:, 25:28]

pt = PowerTransformer(method='box-cox')
X_transformed = pt.fit_transform(X+0.00000000001)

regressor = KernelRidge(alpha=0.0001, gamma=0.75, kernel='rbf')
model = regressor.fit(X_transformed,Y)

# save the model to disk
filename = 'finalized_model_KRR.sav'
pickle.dump(model, open(filename, 'wb'))

In [5]:

import pandas as pd
from sklearn.preprocessing import PowerTransformer
from sklearn.kernel_ridge import KernelRidge
import pickle
import numpy as np
import math


df_testing = pd.read_excel('database_new.xlsx')
pt = PowerTransformer(method='box-cox')

X_test = df_testing.iloc[:, :8]
X_test = pt.fit_transform(X_test+0.00000000001)

filename = 'finalized_model_KRR.sav'
loaded_model = pickle.load(open(filename, 'rb'))
Y_test = loaded_model.predict(X_test)


# Computing others
Y_test = pd.DataFrame(data=Y_test, columns=["q_abs", "q_sca", "g"])

wavelength = df_testing['wavelength']
fractal_dimension = df_testing['fractal_dimension']
fraction_of_coating = df_testing['fraction_of_coating']
primary_particle_size = df_testing['primary_particle_size']
number_of_primary_particles = df_testing['number_of_primary_particles']
vol_equi_radius_inner = df_testing['vol_equi_radius_inner']
vol_equi_radius_outer = df_testing['vol_equi_radius_outer']
equi_mobility_dia = df_testing['equi_mobility_dia']

mie_epsilon = np.zeros_like(wavelength) + 2
length_scale_factor = 2 * math.pi / wavelength

m_real_bc=np.empty_like(wavelength)
for i in range(0,len(wavelength)):
    if wavelength[i]==467:
        m_real_bc[i]=1.92
    elif wavelength[i]==530:
        m_real_bc[i]=1.96
    elif wavelength[i]==660:
        m_real_bc[i]=2
    else:
        m_real_bc[i]=np.nan

m_im_bc = np.empty_like(wavelength)
for i in range(0, len(wavelength)):
    if wavelength[i] == 467:
        m_im_bc[i] = 0.67
    elif wavelength[i] == 530:
        m_im_bc[i] = 0.65
    elif wavelength[i] == 660:
        m_im_bc[i] = 0.63
    else:
        m_im_bc[i] = np.nan

m_real_organics = np.empty_like(wavelength)
for i in range(0, len(wavelength)):
    if wavelength[i] == 467:
        m_real_organics[i] = 1.59
    elif wavelength[i] == 530:
        m_real_organics[i] = 1.47
    elif wavelength[i] == 660:
        m_real_organics[i] = 1.47
    else:
        m_real_organics[i] = np.nan

m_im_organics = np.empty_like(wavelength)
for i in range(0, len(wavelength)):
    if wavelength[i] == 467:
        m_im_organics[i] = 0.11
    elif wavelength[i] == 530:
        m_im_organics[i] = 0.04
    elif wavelength[i] == 660:
        m_im_organics[i] = 0
    else:
        m_im_organics[i] = np.nan

volume_total = (4 * math.pi * (vol_equi_radius_outer ** 3)) / 3
volume_bc = (4 * math.pi * (vol_equi_radius_inner ** 3)) / 3
volume_organics = volume_total - volume_bc

density_bc = np.zeros_like(wavelength) + 1.5 #Check
density_organics = np.zeros_like(wavelength) + 1.1 #Check

mass_bc = volume_bc * density_bc * (1 / 1000000000000000000000)
mass_organics = volume_organics * density_organics * (1 / 1000000000000000000000)
mass_total = mass_bc + mass_organics
mr_total_bc = mass_total / mass_bc
mr_nonbc_bc = mass_organics / mass_bc

q_abs = Y_test['q_abs']
q_sca = Y_test['q_sca']
q_ext = q_abs + q_sca
g = Y_test['g']
c_geo = (math.pi) * ((vol_equi_radius_outer) ** 2)
c_ext = (q_ext * c_geo) / (float(1000000))
c_abs = q_abs * c_geo / (1000000)
c_sca = q_sca * c_geo / (1000000)
ssa = q_sca / q_ext
mac_total = (c_abs) / (mass_total * 1000000000000)
mac_bc = c_abs / (mass_bc * (1000000000000))
mac_organics = c_abs / (mass_organics * (1000000000000))

final = np.stack((wavelength, fractal_dimension, fraction_of_coating, primary_particle_size,
                  number_of_primary_particles, vol_equi_radius_inner, vol_equi_radius_outer, equi_mobility_dia,
                  mie_epsilon, length_scale_factor, m_real_bc, m_im_bc, m_real_organics, m_im_organics,
                  volume_total, volume_bc, volume_organics, density_bc, density_organics, mass_total, mass_organics,
                  mass_bc, mr_total_bc, mr_nonbc_bc, q_ext, q_abs, q_sca, g, c_geo, c_ext, c_abs, c_sca, ssa,
                  mac_total, mac_bc, mac_organics), axis=1)

final_dataset = pd.DataFrame(data=final, columns=['wavelength', 'fractal_dimension', 'fraction_of_coating', 'primary_particle_size',
                  'number_of_primary_particles', 'vol_equi_radius_inner', 'vol_equi_radius_outer', 'equi_mobility_dia',
                  'mie_epsilon', 'length_scale_factor', 'm_real_bc', 'm_im_bc', 'm_real_organics', 'm_im_organics',
                  'volume_total', 'volume_bc', 'volume_organics', 'density_bc', 'density_organics', 'mass_total', 'mass_organics',
                  'mass_bc', 'mr_total_bc', 'mr_nonbc_bc', 'q_ext', 'q_abs', 'q_sca', 'g', 'c_geo', 'c_ext', 'c_abs', 'c_sca', 'ssa',
                  'mac_total', 'mac_bc', 'mac_organics'])
final_dataset.to_csv('sample_predicted_KRR_forward_dataset.csv', index=False)
final_dataset

Unnamed: 0,wavelength,fractal_dimension,fraction_of_coating,primary_particle_size,number_of_primary_particles,vol_equi_radius_inner,vol_equi_radius_outer,equi_mobility_dia,mie_epsilon,length_scale_factor,...,q_sca,g,c_geo,c_ext,c_abs,c_sca,ssa,mac_total,mac_bc,mac_organics
0,660.0,1.5,0.0,15.0,1.0,15.000000,15.000000,23.829600,2.0,0.009520,...,0.000387,0.003811,706.858347,0.000083,0.000083,2.736469e-07,0.003303,3.894379,3.894379,inf
1,660.0,1.5,0.0,15.0,2.0,18.898816,18.898816,33.934547,2.0,0.009520,...,0.001017,0.008846,1122.067684,0.000173,0.000172,1.141349e-06,0.006585,4.059660,4.059660,inf
2,660.0,1.5,0.0,15.0,3.0,21.633744,21.633744,41.730020,2.0,0.009520,...,0.001744,0.016269,1470.324613,0.000269,0.000266,2.564578e-06,0.009541,4.184818,4.184818,inf
3,660.0,1.5,0.0,15.0,4.0,23.811016,23.811016,48.324498,2.0,0.009520,...,0.002496,0.023497,1781.171422,0.000361,0.000357,4.444955e-06,0.012297,4.209175,4.209175,inf
4,660.0,1.5,0.0,15.0,5.0,25.649639,25.649639,54.149126,2.0,0.009520,...,0.003327,0.029664,2066.866345,0.000455,0.000448,6.876997e-06,0.015109,4.227881,4.227881,inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18521,467.0,2.9,90.0,29.0,650.0,129.935866,251.209341,1253.168019,2.0,0.013454,...,2.602333,0.878553,198253.763135,0.765795,0.249872,5.159224e-01,0.673708,3.256925,18.128058,3.970223
18522,467.0,2.9,90.0,29.0,700.0,133.185600,257.492161,1301.438025,2.0,0.013454,...,2.666021,0.882292,208294.544403,0.822056,0.266738,5.553177e-01,0.675523,3.228418,17.969389,3.935473
18523,467.0,2.9,90.0,29.0,800.0,139.247665,269.212152,1393.154829,2.0,0.013454,...,2.778040,0.888214,227687.502395,0.932551,0.300026,6.325251e-01,0.678274,3.177400,17.685422,3.873281
18524,467.0,2.9,90.0,29.0,900.0,144.823408,279.991922,1479.405303,2.0,0.013454,...,2.873968,0.892607,246286.651870,1.040360,0.332540,7.078200e-01,0.680360,3.130434,17.424012,3.816030


In [22]:
import time

array_time=[]
for i in range(0,1000):
    initial_time= time.time()
    import pandas as pd
    from sklearn.preprocessing import StandardScaler,MinMaxScaler
    from keras.models import load_model
    import pickle
    import numpy as np

    df_testing= pd.read_csv('..\data\database.csv')
    X_test = df_testing.loc[:, ['wavelength', 'fractal_dimension', 'fraction_of_coating', 'primary_particle_size',
                       'number_of_primary_particles',
                       'vol_equi_radius_outer', 'vol_equi_radius_inner', 'equi_mobility_dia']]
    
    #X_test=df_testing #batch of 32 samples
    scaling_x = pickle.load(open('..\data\scaler_x.sav', 'rb'))
    scaling_y = pickle.load(open('..\data\scaler_y.sav', 'rb'))

    X_test= scaling_x.transform(X_test)
    

    model = load_model('../data/best_model_forward.hdf5')
    Y_test = model.predict(X_test)
    Y_test = scaling_y.inverse_transform(Y_test)
    #print(Y_test)

    # Computing others
    Y_test = pd.DataFrame(data=Y_test, columns=["q_abs", "q_sca", "g"])

    wavelength = df_testing['wavelength']
    fractal_dimension = df_testing['fractal_dimension']
    fraction_of_coating = df_testing['fraction_of_coating']
    primary_particle_size = df_testing['primary_particle_size']
    number_of_primary_particles = df_testing['number_of_primary_particles']
    vol_equi_radius_inner = df_testing['vol_equi_radius_inner']
    vol_equi_radius_outer = df_testing['vol_equi_radius_outer']
    equi_mobility_dia = df_testing['equi_mobility_dia']

    mie_epsilon = np.zeros_like(wavelength) + 2
    length_scale_factor = 2 * math.pi / wavelength

    m_real_bc=np.empty_like(wavelength)
    for i in range(0,len(wavelength)):
        if wavelength[i]==467:
            m_real_bc[i]=1.92
        elif wavelength[i]==530:
            m_real_bc[i]=1.96
        elif wavelength[i]==660:
            m_real_bc[i]=2
        else:
            m_real_bc[i]=np.nan

    m_im_bc = np.empty_like(wavelength)
    for i in range(0, len(wavelength)):
        if wavelength[i] == 467:
            m_im_bc[i] = 0.67
        elif wavelength[i] == 530:
            m_im_bc[i] = 0.65
        elif wavelength[i] == 660:
            m_im_bc[i] = 0.63
        else:
            m_im_bc[i] = np.nan

    m_real_organics = np.empty_like(wavelength)
    for i in range(0, len(wavelength)):
        if wavelength[i] == 467:
            m_real_organics[i] = 1.59
        elif wavelength[i] == 530:
            m_real_organics[i] = 1.47
        elif wavelength[i] == 660:
            m_real_organics[i] = 1.47
        else:
            m_real_organics[i] = np.nan

    m_im_organics = np.empty_like(wavelength)
    for i in range(0, len(wavelength)):
        if wavelength[i] == 467:
            m_im_organics[i] = 0.11
        elif wavelength[i] == 530:
            m_im_organics[i] = 0.04
        elif wavelength[i] == 660:
            m_im_organics[i] = 0
        else:
            m_im_organics[i] = np.nan

    volume_total = (4 * math.pi * (vol_equi_radius_outer ** 3)) / 3
    volume_bc = (4 * math.pi * (vol_equi_radius_inner ** 3)) / 3
    volume_organics = volume_total - volume_bc

    density_bc = np.zeros_like(wavelength) + 1.5 #Check
    density_organics = np.zeros_like(wavelength) + 1.1 #Check

    mass_bc = volume_bc * density_bc * (1 / 1000000000000000000000)
    mass_organics = volume_organics * density_organics * (1 / 1000000000000000000000)
    mass_total = mass_bc + mass_organics
    mr_total_bc = mass_total / mass_bc
    mr_nonbc_bc = mass_organics / mass_bc

    q_abs = Y_test['q_abs']
    q_sca = Y_test['q_sca']
    q_ext = q_abs + q_sca
    g = Y_test['g']
    c_geo = (math.pi) * ((vol_equi_radius_outer) ** 2)
    c_ext = (q_ext * c_geo) / (float(1000000))
    c_abs = q_abs * c_geo / (1000000)
    c_sca = q_sca * c_geo / (1000000)
    ssa = q_sca / q_ext
    mac_total = (c_abs) / (mass_total * 1000000000000)
    mac_bc = c_abs / (mass_bc * (1000000000000))
    mac_organics = c_abs / (mass_organics * (1000000000000))

    final = np.stack((wavelength, fractal_dimension, fraction_of_coating, primary_particle_size,
                      number_of_primary_particles, vol_equi_radius_inner, vol_equi_radius_outer, equi_mobility_dia,
                      mie_epsilon, length_scale_factor, m_real_bc, m_im_bc, m_real_organics, m_im_organics,
                      volume_total, volume_bc, volume_organics, density_bc, density_organics, mass_total, mass_organics,
                      mass_bc, mr_total_bc, mr_nonbc_bc, q_ext, q_abs, q_sca, g, c_geo, c_ext, c_abs, c_sca, ssa,
                      mac_total, mac_bc, mac_organics), axis=1)

    final_dataset = pd.DataFrame(data=final, columns=['wavelength', 'fractal_dimension', 'fraction_of_coating', 'primary_particle_size',
                      'number_of_primary_particles', 'vol_equi_radius_inner', 'vol_equi_radius_outer', 'equi_mobility_dia',
                      'mie_epsilon', 'length_scale_factor', 'm_real_bc', 'm_im_bc', 'm_real_organics', 'm_im_organics',
                      'volume_total', 'volume_bc', 'volume_organics', 'density_bc', 'density_organics', 'mass_total', 'mass_organics',
                      'mass_bc', 'mr_total_bc', 'mr_nonbc_bc', 'q_ext', 'q_abs', 'q_sca', 'g', 'c_geo', 'c_ext', 'c_abs', 'c_sca', 'ssa',
                      'mac_total', 'mac_bc', 'mac_organics'])
    final_dataset.to_csv('..\data\sample_predicted_forward_dataset.csv', index=False)
    finish_time= time.time()
    time_diff=finish_time-initial_time
    print(time_diff)
    array_time.append(time_diff)
    





1.0146443843841553
0.928720235824585
0.9248001575469971
0.9587106704711914
0.9495491981506348
0.9367351531982422
0.9540741443634033
1.2820072174072266
0.9161128997802734
0.9035370349884033
0.905494213104248
0.8981459140777588
0.9231584072113037
0.9261491298675537
0.9479751586914062
0.9343321323394775
0.9164814949035645
0.9433472156524658
0.9536776542663574
0.929002046585083
0.9035904407501221
1.0058069229125977
0.9625115394592285
0.9010310173034668
0.9578096866607666
0.9364142417907715
0.9260506629943848
0.9481987953186035
0.9149487018585205
0.9105498790740967
0.9397587776184082
0.9024064540863037
0.9075155258178711
0.927513599395752
0.9293460845947266
0.9531331062316895
0.9366345405578613
0.959937572479248
0.9550328254699707
0.9445896148681641
0.9358148574829102
0.9364502429962158
0.9160013198852539
0.9534382820129395
0.9219460487365723
0.9590024948120117
0.9490318298339844
0.920661449432373
0.9909658432006836
0.9559965133666992
0.9450058937072754
0.9919958114624023
0.9568889141082764

0.9672901630401611
0.948732852935791
0.9772956371307373
0.9543824195861816
0.9323091506958008
0.9823458194732666
1.4854528903961182
0.9663946628570557
0.9167802333831787
0.9855797290802002
0.9868471622467041
0.954592227935791
0.9004676342010498
1.0125315189361572
0.9135878086090088
0.9573929309844971
0.9486696720123291
0.9473361968994141
0.9432086944580078
0.9006044864654541
0.9500124454498291
1.0216670036315918
0.9918522834777832
0.9782381057739258
0.9660770893096924
0.9648356437683105
0.9891674518585205
0.9660489559173584
0.9643702507019043
0.9776406288146973
0.9166984558105469
0.951737642288208
0.9001469612121582
0.9748649597167969
0.9134194850921631
0.9389958381652832
0.948915958404541
0.923229455947876
0.9520838260650635
0.8956098556518555
0.9295108318328857
0.9324760437011719
0.9656894207000732
0.9311180114746094
0.9838573932647705
0.9849228858947754
0.9468023777008057
0.943152666091919
0.9355881214141846
0.9705135822296143
0.9401228427886963
0.9376323223114014
0.9362583160400391

0.9621477127075195
0.9230248928070068
0.9588818550109863
0.9888644218444824
0.922116756439209
0.9381308555603027
1.0066149234771729
0.9938452243804932
0.9713835716247559
0.9141736030578613
1.0173888206481934
0.955080509185791
0.9544610977172852
1.0095465183258057
0.9975717067718506
0.9451696872711182
1.0157227516174316
0.9257311820983887
0.9203059673309326
0.9250214099884033
0.9835493564605713
1.0125296115875244
1.0122737884521484
0.9312272071838379
0.9862852096557617
0.9423081874847412
1.0199856758117676
0.9821171760559082
1.4981443881988525
0.9440934658050537
1.0131192207336426
0.9708774089813232
0.9587974548339844
0.9569613933563232
0.9542121887207031
1.0320508480072021
0.968059778213501
0.9829690456390381
1.029651403427124
1.0513825416564941
1.0372371673583984
0.9561367034912109
0.9677698612213135
1.0151302814483643
0.9403259754180908
0.930060863494873
0.9649407863616943
0.9411256313323975
0.9614813327789307
1.0679872035980225
0.9752590656280518
0.9545533657073975
0.949323892593383

In [6]:
array_time

NameError: name 'array_time' is not defined

In [4]:
from statistics import mean
mean_time=mean(array_time)

NameError: name 'array_time' is not defined

In [3]:
mean_time/32

NameError: name 'mean_time' is not defined