In [None]:
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt

import seaborn as sns
sns.set()

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

import xgboost as xgb

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler

import optuna
import tensorflow as tf

#import shap

import warnings
warnings.filterwarnings("ignore")

import optuna.visualization as vis

from bayes_opt import BayesianOptimization

from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

In [None]:
df = pd.read_excel("Masters Project Ansys Data (10).xlsx", sheet_name=1)
df

In [None]:
df.head()

In [None]:
df.info()

In [None]:
cat_cols = ['Solder Ball shapes', 'Dispensing methods', 'Underfill Material']
encoders = {col: LabelEncoder().fit(df[col]) for col in cat_cols}
for col, le in encoders.items():
    df[col] = le.transform(df[col])

In [None]:
encoders

In [None]:
df.head()

In [None]:
X = df.drop(['S/N', 'Simulation Time (s)'], axis=1)
y = df['Simulation Time (s)']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X

## Random Forest Regressor

In [None]:
model = RandomForestRegressor(random_state=21)
model.fit(X_train, y_train)
preds = model.predict(X_test)
print("Random Forest RMSE:", mean_squared_error(y_test, preds, squared=False))

In [None]:
importances = model.feature_importances_
features = X.columns
indices = importances.argsort()[::-1]

plt.figure(figsize=(10, 6))
sns.barplot(x=importances[indices], y=features[indices], palette="coolwarm")
plt.title("Feature Importance from Random Forest")
plt.xlabel("Importance")
plt.ylabel("Features")
plt.savefig("forest_importance.png", dpi=300, bbox_inches='tight')
plt.show()

In [None]:
y_pred = model.predict(X_test)

plt.figure(figsize=(8, 6))
sns.scatterplot(x=y_test, y=y_pred)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')  # ideal line
plt.xlabel("Actual Simulation Time")
plt.ylabel("Predicted Simulation Time")
plt.title("Actual vs Predicted Values")
plt.savefig("forest_accuracy.png", dpi=300, bbox_inches='tight')
plt.show()

print("R² Score:", r2_score(y_test, y_pred))

In [None]:
#explainer = shap.TreeExplainer(model)
#shap_values = explainer.shap_values(X_test)

#shap.summary_plot(shap_values, X_test)

## Xgboost Regressor

In [None]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)

In [None]:
xgb_preds = xgb_model.predict(X_test)
xgb_rmse = mean_squared_error(y_test, xgb_preds, squared=False)
print("XGBoost RMSE:", xgb_rmse)

## Optimization

In [None]:
#optuna
def objective(trial):
    shape = trial.suggest_categorical("Solder Ball shapes", 
                                      encoders['Solder Ball shapes'].transform(encoders['Solder Ball shapes'].classes_).tolist())
    method = trial.suggest_categorical("Dispensing methods", 
                                       encoders['Dispensing methods'].transform(encoders['Dispensing methods'].classes_).tolist())
    material = trial.suggest_categorical("Underfill Material", 
                                         encoders['Underfill Material'].transform(encoders['Underfill Material'].classes_).tolist())
    
    viscosity = trial.suggest_float("Viscosity", df['Viscosity'].min(), df['Viscosity'].max())
    surface_tension = trial.suggest_float("Surface Tension", df['Surface Tension'].min(), df['Surface Tension'].max())
    density = trial.suggest_float("Density", df['Density'].min(), df['Density'].max())

    # Create feature array in the correct order
    X = np.array([[shape, method, material, viscosity, surface_tension, density]])

    # Predict and return simulation time
    return model.predict(X)[0]

# Run the optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

# Show results
print("Best Parameters (Encoded):", study.best_params)
decoded = {
    'Solder Ball shapes': encoders['Solder Ball shapes'].inverse_transform([study.best_params['Solder Ball shapes']])[0],
    'Dispensing methods': encoders['Dispensing methods'].inverse_transform([study.best_params['Dispensing methods']])[0],
    'Underfill Material': encoders['Underfill Material'].inverse_transform([study.best_params['Underfill Material']])[0],
    'Viscosity': study.best_params['Viscosity'],
    'Surface Tension': study.best_params['Surface Tension'],
    'Density': study.best_params['Density']
}
print("Best Parameters (Decoded):", decoded)
print("Minimum Predicted Simulation Time:", study.best_value)

In [None]:
vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()


In [None]:
vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()
vis.plot_parallel_coordinate(study).show()
vis.plot_contour(study).show()
vis.plot_slice(study).show()
plt.savefig("slice_plot.png", dpi=300, bbox_inches='tight')

In [None]:
vis.plot_slice(study).show()

In [None]:
from skopt import gp_minimize
from skopt.space import Categorical, Real
from skopt.utils import use_named_args
import numpy as np

# Define the search space
space = [
    Categorical(encoders['Solder Ball shapes'].transform(encoders['Solder Ball shapes'].classes_).tolist(), name='Solder Ball shapes'),
    Categorical(encoders['Dispensing methods'].transform(encoders['Dispensing methods'].classes_).tolist(), name='Dispensing methods'),
    Categorical(encoders['Underfill Material'].transform(encoders['Underfill Material'].classes_).tolist(), name='Underfill Material'),
    Real(df['Viscosity'].min(), df['Viscosity'].max(), name='Viscosity'),
    Real(df['Surface Tension'].min(), df['Surface Tension'].max(), name='Surface Tension'),
    Real(df['Density'].min(), df['Density'].max(), name='Density')
]

# Define the objective function
@use_named_args(space)
def objective(**params):
    X = np.array([[params['Solder Ball shapes'],
                   params['Dispensing methods'],
                   params['Underfill Material'],
                   params['Viscosity'],
                   params['Surface Tension'],
                   params['Density']]])
    return model.predict(X)[0]

# Run Bayesian Optimization
result = gp_minimize(objective, dimensions=space, n_calls=100, random_state=42)

# Best encoded parameters
best_encoded = dict(zip(
    ['Solder Ball shapes', 'Dispensing methods', 'Underfill Material', 'Viscosity', 'Surface Tension', 'Density'],
    result.x
))
print("Best Parameters (Encoded):", best_encoded)

# Decode categorical features
decoded = {
    'Solder Ball shapes': encoders['Solder Ball shapes'].inverse_transform([best_encoded['Solder Ball shapes']])[0],
    'Dispensing methods': encoders['Dispensing methods'].inverse_transform([best_encoded['Dispensing methods']])[0],
    'Underfill Material': encoders['Underfill Material'].inverse_transform([best_encoded['Underfill Material']])[0],
    'Viscosity': best_encoded['Viscosity'],
    'Surface Tension': best_encoded['Surface Tension'],
    'Density': best_encoded['Density']
}
print("Best Parameters (Decoded):", decoded)
print("Minimum Predicted Simulation Time:", result.fun)


In [None]:
from skopt.plots import plot_convergence
import matplotlib.pyplot as plt

plot_convergence(result)
plt.title("Convergence Plot - skopt")
plt.show()

In [None]:
from skopt.plots import plot_convergence
plot_convergence(result)

In [None]:
from skopt.plots import plot_objective
plot_objective(result)
