In [42]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.utils import shuffle
from itertools import product

df = pd.read_excel("Mg all data.xlsx")

X = df[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']]

#To select target variables
y_dls = df['DLS (nm)']
y_vis = df['UV VIS']
y_peak = df['UV peak(nm)']

#To train for target variables for surrogate model
dls = RandomForestRegressor(n_estimators=100, random_state=42)
dls.fit(X, y_dls)

vis = RandomForestRegressor(n_estimators=100, random_state=42)
vis.fit(X, y_vis)

peak = RandomForestRegressor(n_estimators=100, random_state=42)
peak.fit(X, y_peak)

# creating new values
time_new = np.linspace(df['Time(min)'].min(), df['Time(min)'].max(), 7)
scanspeed_new = np.linspace(df['Scanspeed(mm/s)'].min(), df['Scanspeed(mm/s)'].max(), 7)
fluence_new = np.linspace(df['Fluence (J/cm2)'].min(), df['Fluence (J/cm2)'].max(), 7)

#to create all combinations from input as per experiment
grid = list(product(time_new, scanspeed_new, fluence_new))
df_synthetic = pd.DataFrame(grid, columns=['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)'])

#to predict the outputs
df_synthetic['DLS (nm)'] = dls.predict(df_synthetic[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']])
df_synthetic['UV VIS'] = vis.predict(df_synthetic[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']])
df_synthetic['UV peak(nm)'] = peak.predict(df_synthetic[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']])

In [43]:
df_synthetic

Unnamed: 0,Time(min),Scanspeed(mm/s),Fluence (J/cm2),DLS (nm),UV VIS,UV peak(nm)
0,2.0,3000.0,1.830000,201.505945,0.556255,203.768750
1,2.0,3000.0,1.843333,201.505945,0.556255,203.768750
2,2.0,3000.0,1.856667,119.313010,0.415149,202.991488
3,2.0,3000.0,1.870000,119.313010,0.415149,202.991488
4,2.0,3000.0,1.883333,119.313010,0.412415,202.991488
...,...,...,...,...,...,...
338,25.0,3500.0,1.856667,109.977408,2.144186,204.965413
339,25.0,3500.0,1.870000,109.977408,2.144186,204.965413
340,25.0,3500.0,1.883333,110.023230,2.145598,204.965413
341,25.0,3500.0,1.896667,105.395149,2.251881,206.732667


In [44]:
df_synthetic.describe()

Unnamed: 0,Time(min),Scanspeed(mm/s),Fluence (J/cm2),DLS (nm),UV VIS,UV peak(nm)
count,343.0,343.0,343.0,343.0,343.0,343.0
mean,13.5,3250.0,1.87,129.52914,1.181795,203.799087
std,7.677867,166.910154,0.026706,25.508647,0.659983,1.203814
min,2.0,3000.0,1.83,83.030464,0.371229,201.901113
25%,5.833333,3083.333333,1.843333,109.977408,0.649395,202.855583
50%,13.5,3250.0,1.87,121.029174,0.740281,203.219244
75%,21.166667,3416.666667,1.896667,148.859583,1.818346,204.965413
max,25.0,3500.0,1.91,201.505945,2.251881,206.732667


# ML models

In [55]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
import warnings
warnings.filterwarnings("ignore")

In [56]:
X = df_synthetic[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']]

In [59]:
y= df_synthetic['DLS (nm)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"{model} R2_value: {r2:.4f} rmse: {rmse}")

RandomForestRegressor(random_state=42) R2_value: 0.9902 rmse: 2.5093230050743696


In [62]:
import joblib

joblib.dump(model, 'random_forest_dls_model.pkl')
print("Model saved as random_forest_dls_model.pkl")

Model saved as random_forest_dls_model.pkl


In [65]:
y= df_synthetic['UV peak(nm)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"{model} R2_value: {r2:.4f} rmse: {rmse}")

RandomForestRegressor(random_state=42) R2_value: 0.9972 rmse: 0.06618444791695595


In [66]:
import joblib

joblib.dump(model, 'random_forest_uvpeak_model.pkl')
print("Model saved as random_forest_uvpeak_model.pkl")

Model saved as random_forest_uvpeak_model.pkl


In [70]:
y= df_synthetic['UV VIS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"{model} R2_value: {r2:.4f} rmse: {rmse}")

RandomForestRegressor(random_state=42) R2_value: 1.0000 rmse: 0.003925928381270257


In [71]:
import joblib

joblib.dump(model, 'random_forest_uvivs_model.pkl')
print("Model saved as random_forest_uvivs_model.pkl")

Model saved as random_forest_uvivs_model.pkl
