<a href="https://colab.research.google.com/github/coding-cosmos/Sluice-Gate-Cavitation-Index/blob/main/notebooks/Sluice_Gate_OFAT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OFAT

## Utils

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor

def cc_score(y_true, y_pred):
    """Correlation Coefficient"""
    return np.corrcoef(y_true, y_pred)[0, 1]

def lovo_sensitivity(df, trained_model, target='sigma', test_size=0.2, random_state=42):
    """
    Perform Leave-One-Variable-Out (LOVO) sensitivity analysis.

    Parameters:
        df: DataFrame with features and target.
        trained_model: Already trained model (used for full feature evaluation).
        target: Target column name.
        test_size: Fraction for test data.
        random_state: Seed for reproducibility.

    Returns:
        DataFrame with CC and RMSE for each feature removal.
    """
    results = []
    X = df.drop(columns=[target])
    y = df[target]

    # Full model evaluation (with passed trained model)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    y_pred_train_full = trained_model.predict(X_train)
    y_pred_test_full = trained_model.predict(X_test)

    results.append({
        "Parameter Removed": "â€”",
        "Train CC": cc_score(y_train, y_pred_train_full),
        "Train RMSE": mean_squared_error(y_train, y_pred_train_full),
        "Test CC": cc_score(y_test, y_pred_test_full),
        "Test RMSE": mean_squared_error(y_test, y_pred_test_full),
    })

    # Leave-one-feature-out retraining
    for col in X.columns:
        print(f"ðŸš§ Training without feature: {col}")
        X_dropped = X.drop(columns=[col])
        X_train, X_test, y_train, y_test = train_test_split(X_dropped, y, test_size=test_size, random_state=random_state)

        # Clone the trained model's configuration (not its weights)
        model_clone = type(trained_model)(**trained_model.get_params())
        model_clone.fit(X_train, y_train)

        y_pred_train = model_clone.predict(X_train)
        y_pred_test = model_clone.predict(X_test)

        results.append({
            "Parameter Removed": col,
            "Train CC": cc_score(y_train, y_pred_train),
            "Train RMSE": mean_squared_error(y_train, y_pred_train),
            "Test CC": cc_score(y_test, y_pred_test),
            "Test RMSE": mean_squared_error(y_test, y_pred_test),
        })

    return pd.DataFrame(results)


## Dimensional

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load and rename columns
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/dim.csv")
df.columns = ['AR', 'phi', 'L', 'QW', 'Qa', 'Va', 'h', 'R', 'sigma']
X = df.drop(columns=['sigma'])
y = df['sigma']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
params_gbm_20 = {'n_estimators': 310, 'max_depth': 7, 'learning_rate': 0.10664589740085356, 'subsample': 0.6623650248841277, 'min_samples_split': 17, 'min_samples_leaf': 2, 'max_features': None}
gbm = GradientBoostingRegressor(**params_gbm_20)
gbm.fit(X_train,y_train)

In [None]:
# Trained GBR model (trained on df with full features)
lovo_results = lovo_sensitivity(df, trained_model=gbm, target='sigma')

# View table
print(lovo_results)


ðŸš§ Training without feature: AR
ðŸš§ Training without feature: phi
ðŸš§ Training without feature: L
ðŸš§ Training without feature: QW
ðŸš§ Training without feature: Qa
ðŸš§ Training without feature: Va
ðŸš§ Training without feature: h
ðŸš§ Training without feature: R
  Parameter Removed  Train CC  Train RMSE   Test CC   Test RMSE
0                 â€”  0.999990    0.012752  0.998924    1.824752
1                AR  0.999994    0.007439  0.998968    1.715453
2               phi  0.999977    0.030320  0.998601    2.369494
3                 L  0.999990    0.013369  0.998964    1.739323
4                QW  0.938227   79.758257  0.875700  200.617672
5                Qa  0.999993    0.008764  0.999004    1.680662
6                Va  0.999991    0.011828  0.998908    1.830652
7                 h  0.999987    0.016676  0.999069    1.553297
8                 R  0.999988    0.016251  0.998970    1.720705


## Non Dimensional

In [None]:
# Load and rename columns
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/non_dim.csv")
df.columns = ['AR', 'phi', 'R/L', 'Fr', 'Qa/Qw', 'sigma']
X = df.drop(columns=['sigma'])
y = df['sigma']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
params_gbm_20 = {'n_estimators': 252, 'max_depth': 8, 'learning_rate': 0.06285652370947764, 'subsample': 0.8059737929229986, 'min_samples_split': 9, 'min_samples_leaf': 5, 'max_features': None}
gbm = GradientBoostingRegressor(**params_gbm_20)
gbm.fit(X_train,y_train)

In [None]:
# Trained GBR model (trained on df with full features)
lovo_results = lovo_sensitivity(df, trained_model=gbm, target='sigma')

# View table
print(lovo_results)


ðŸš§ Training without feature: AR
ðŸš§ Training without feature: phi
ðŸš§ Training without feature: R/L
ðŸš§ Training without feature: Fr
ðŸš§ Training without feature: Qa/Qw
  Parameter Removed  Train CC  Train RMSE   Test CC   Test RMSE
0                 â€”  0.999992    0.010169  0.999713    0.482366
1                AR  0.999977    0.030346  0.999242    1.289176
2               phi  0.999699    0.405016  0.983894   26.662073
3               R/L  0.999988    0.016014  0.999735    0.446794
4                Fr  0.939664   77.972018  0.829590  277.919089
5             Qa/Qw  0.999970    0.039816  0.999252    1.331867
