In [116]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

df = pd.read_excel("Mg all data.xlsx")

In [117]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Time(min)             81 non-null     int64  
 1   Scanspeed(mm/s)       81 non-null     int64  
 2   Fluence (J/cm2)       81 non-null     float64
 3   Repetition rate(kHz)  81 non-null     int64  
 4   Pulsewidth            81 non-null     float64
 5   Power %               81 non-null     int64  
 6   DLS (nm)              81 non-null     float64
 7   UV peak(nm)           81 non-null     float64
 8   UV VIS                81 non-null     float64
dtypes: float64(5), int64(4)
memory usage: 5.8 KB


In [118]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

In [119]:
X = df[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']]
y = df['UV VIS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("R² Score:", r2)
print("RMSE:", rmse)

R² Score: 0.8375586243548435
RMSE: 0.17591991685419445


In [120]:
X = df[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']]
y = df['UV peak(nm)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("R² Score:", r2)
print("RMSE:", rmse)

R² Score: 0.010651664280667439
RMSE: 0.9324864949302868


In [121]:
X = df[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']]
y = df['DLS (nm)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("R² Score:", r2)
print("RMSE:", rmse)

R² Score: -0.46401887251200935
RMSE: 43.65743837254852


# Hyperparameter tuning

In [142]:
from sklearn.model_selection import GridSearchCV

X = df[['Time(min)', 'Scanspeed(mm/s)', 'Fluence (J/cm2)']]
y = df['UV VIS']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#defining the parameters
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 3, 5, 10],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'bootstrap': [True, False]
}

grid_search = GridSearchCV(
    estimator=RandomForestRegressor(random_state=42),
    param_grid=param_grid,
    scoring='r2',
    cv=5,
    n_jobs=-1
)

#model evaluation
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)
r2_best = r2_score(y_test, y_pred_best)
rmse_best = mean_squared_error(y_test, y_pred_best, squared=False)

print("Best Parameters:", grid_search.best_params_)
print("Best R² Score on Test Data:", r2_best)
print("Best RMSE on Test Data:", rmse_best)

Best Parameters: {'bootstrap': True, 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Best R² Score on Test Data: 0.8656641167241799
Best RMSE on Test Data: 0.15997889744290422
