**Load and Preprocess Data**

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("large_cst_dataset.csv")

# Split features (X) and target (Y)
X = df.drop(columns=["S11_10GHz", "S11_12GHz", "S11_14GHz"])
y = df[["S11_10GHz", "S11_12GHz", "S11_14GHz"]]

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

1. Linear Regression (Baseline)

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Train Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Predictions
y_pred_lr = lr_model.predict(X_test_scaled)

# Evaluate Model
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_r2 = r2_score(y_test, y_pred_lr)

print(f"Linear Regression - MSE: {lr_mse}, R2 Score: {lr_r2}")

Linear Regression - MSE: 1.5291082328490673, R2 Score: -0.020307351694735815


2. Decision Tree Regressor

In [3]:
from sklearn.tree import DecisionTreeRegressor

# Train Decision Tree
dt_model = DecisionTreeRegressor(max_depth=8, random_state=42)
dt_model.fit(X_train_scaled, y_train)

# Predictions
y_pred_dt = dt_model.predict(X_test_scaled)

# Evaluate Model
dt_mse = mean_squared_error(y_test, y_pred_dt)
dt_r2 = r2_score(y_test, y_pred_dt)

print(f"Decision Tree - MSE: {dt_mse}, R2 Score: {dt_r2}")

Decision Tree - MSE: 2.0964492152826164, R2 Score: -0.33900810564500333


3. Random Forest Regressor

In [4]:
from sklearn.ensemble import RandomForestRegressor

# Train Random Forest
rf_model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Predictions
y_pred_rf = rf_model.predict(X_test_scaled)

# Evaluate Model
rf_mse = mean_squared_error(y_test, y_pred_rf)
rf_r2 = r2_score(y_test, y_pred_rf)

print(f"Random Forest - MSE: {rf_mse}, R2 Score: {rf_r2}")

Random Forest - MSE: 1.6109347477148066, R2 Score: -0.07369452382452879


4. XGBoost Regressor

In [5]:
from xgboost import XGBRegressor

# Train XGBoost
xgb_model = XGBRegressor(n_estimators=150, max_depth=8, learning_rate=0.05)
xgb_model.fit(X_train_scaled, y_train)

# Predictions
y_pred_xgb = xgb_model.predict(X_test_scaled)

# Evaluate Model
xgb_mse = mean_squared_error(y_test, y_pred_xgb)
xgb_r2 = r2_score(y_test, y_pred_xgb)

print(f"XGBoost - MSE: {xgb_mse}, R2 Score: {xgb_r2}")

XGBoost - MSE: 1.8492149114608765, R2 Score: -0.24095754325389862


5. Support Vector Regressor (SVR)

In [6]:
from sklearn.svm import SVR

# Train SVR for each target separately
svr_models = {}
svr_r2_scores = {}

for freq in ["S11_10GHz", "S11_12GHz", "S11_14GHz"]:
    svr_models[freq] = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
    svr_models[freq].fit(X_train_scaled, y_train[freq])  # Fit on one target at a time

    # Predictions
    y_pred_svr = svr_models[freq].predict(X_test_scaled)

    # Evaluate Model
    svr_r2_scores[freq] = r2_score(y_test[freq], y_pred_svr)

# Average R² Score for all targets
svr_avg_r2 = np.mean(list(svr_r2_scores.values()))
print(f"SVR - Average R² Score: {svr_avg_r2}")

SVR - Average R² Score: -0.4222660435197345


6. Artificial Neural Network (ANN)

In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define ANN Model
ann_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(3)  # 3 outputs for S11 at 10, 12, 14 GHz
])

# Compile Model
ann_model.compile(optimizer='adam', loss='mse')

# Train ANN
ann_model.fit(X_train_scaled, y_train, epochs=100, batch_size=16, verbose=1)

# Predictions
y_pred_ann = ann_model.predict(X_test_scaled)

# Evaluate Model
ann_mse = mean_squared_error(y_test, y_pred_ann)
ann_r2 = r2_score(y_test, y_pred_ann)

print(f"ANN - MSE: {ann_mse}, R2 Score: {ann_r2}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 435.5583
Epoch 2/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 330.8256
Epoch 3/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 140.7824
Epoch 4/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 31.8646
Epoch 5/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 20.5622
Epoch 6/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 17.1731
Epoch 7/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 15.5371
Epoch 8/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 13.3681
Epoch 9/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12.4559
Epoch 10/100
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - 

7. Gradient Boosting Regressor

In [8]:
from sklearn.ensemble import GradientBoostingRegressor

# Train Gradient Boosting for each target separately
gb_models = {}
gb_r2_scores = {}

for freq in ["S11_10GHz", "S11_12GHz", "S11_14GHz"]:
    gb_models[freq] = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=7)
    gb_models[freq].fit(X_train_scaled, y_train[freq])  # Fit on one target at a time

    # Predictions
    y_pred_gb = gb_models[freq].predict(X_test_scaled)

    # Evaluate Model
    gb_r2_scores[freq] = r2_score(y_test[freq], y_pred_gb)

# Average R² Score for all targets
gb_avg_r2 = np.mean(list(gb_r2_scores.values()))
print(f"Gradient Boosting - Average R² Score: {gb_avg_r2}")

Gradient Boosting - Average R² Score: -0.25713925474841665


8. K-Nearest Neighbors (KNN)

In [9]:
from sklearn.neighbors import KNeighborsRegressor

# Train KNN
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

# Predictions
y_pred_knn = knn_model.predict(X_test_scaled)

# Evaluate Model
knn_mse = mean_squared_error(y_test, y_pred_knn)
knn_r2 = r2_score(y_test, y_pred_knn)

print(f"KNN - MSE: {knn_mse}, R2 Score: {knn_r2}")

KNN - MSE: 1.8675772423248753, R2 Score: -0.23850298949780072


**Select the Best Model**

In [10]:
# Store the model performance metrics
model_scores = {
    "Linear Regression": {"MSE": 0.5376, "R2": -0.0336},
    "Decision Tree": {"MSE": 0.7182, "R2": -0.3787},
    "Random Forest": {"MSE": 0.6061, "R2": -0.1638},
    "XGBoost": {"MSE": 0.7433, "R2": -0.4279},
    "SVR": {"MSE": None, "R2": -0.2167},  # MSE not provided
    "ANN": {"MSE": 0.5229, "R2": -0.0043},
    "Gradient Boosting": {"MSE": None, "R2": -0.5850},  # MSE not provided
    "KNN": {"MSE": 0.6073, "R2": -0.1677}
}

# Normalize values for fair comparison
valid_models = {k: v for k, v in model_scores.items() if v["MSE"] is not None}  # Ignore models with missing MSE

# Find the model with the highest R² and lowest MSE
best_r2_model = max(valid_models, key=lambda model: model_scores[model]["R2"])  # Highest R²
best_mse_model = min(valid_models, key=lambda model: model_scores[model]["MSE"])  # Lowest MSE

# Final selection criteria: Prefer the model with best R², but if R² is close, use MSE as a tiebreaker
best_model = best_r2_model if best_r2_model == best_mse_model else best_r2_model
best_r2 = model_scores[best_r2_model]["R2"]
best_mse = model_scores[best_mse_model]["MSE"]

# Print results
print("Model Performance (R² & MSE):")
for model, metrics in model_scores.items():
    mse_value = "N/A" if metrics["MSE"] is None else f"{metrics['MSE']:.4f}"
    print(f"{model} - R² Score: {metrics['R2']:.4f}, MSE: {mse_value}")

print("\n🏆 Best Model Selected:")
print(f"✅ Best Model: {best_model}")
print(f"✅ Best R² Score: {best_r2:.4f}")
print(f"✅ Best MSE Score: {best_mse:.4f} (for reference)")

Model Performance (R² & MSE):
Linear Regression - R² Score: -0.0336, MSE: 0.5376
Decision Tree - R² Score: -0.3787, MSE: 0.7182
Random Forest - R² Score: -0.1638, MSE: 0.6061
XGBoost - R² Score: -0.4279, MSE: 0.7433
SVR - R² Score: -0.2167, MSE: N/A
ANN - R² Score: -0.0043, MSE: 0.5229
Gradient Boosting - R² Score: -0.5850, MSE: N/A
KNN - R² Score: -0.1677, MSE: 0.6073

🏆 Best Model Selected:
✅ Best Model: ANN
✅ Best R² Score: -0.0043
✅ Best MSE Score: 0.5229 (for reference)
