#SVR FOR AM-I

In [1]:
import os
import joblib
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ========== Configuration ==========
DATA_FOLDER = './train_test_split'
OUTPUT_FOLDER = './svr-models'
MODEL_SAVE_FOLDER = os.path.join(OUTPUT_FOLDER, 'AM-I-svr-model')
SEED = 42

FEATURE_COLS = ['MolWt', 'logP', 'TPSA', 'H_bond_donors', 'H_bond_acceptors']
FP_COLS = [f'col{i}' for i in range(823)]
MG_COLS = [f'fp_{i}' for i in range(1024)]
ALL_FEATURES = FEATURE_COLS + FP_COLS + MG_COLS
TARGET_COL = 'UV_RT-s'

os.makedirs(MODEL_SAVE_FOLDER, exist_ok=True)

# Global evaluation results
all_eval_results = []

# iPhone color scheme (fresh style)
IPHONE_COLORS = {
    "scatter": "#007AFF",
    "line": "#FF3B30",
    "residual": "#34C759",
    "text": "#1C1C1E"
}

def load_and_prepare_data(train_file, test_file):
    """Load and preprocess training and testing data."""
    train_df = pd.read_csv(train_file).dropna(subset=ALL_FEATURES + [TARGET_COL])
    test_df = pd.read_csv(test_file).dropna(subset=ALL_FEATURES + [TARGET_COL])

    X_train = train_df[ALL_FEATURES].values
    y_train = train_df[TARGET_COL].values
    X_test = test_df[ALL_FEATURES].values
    y_test = test_df[TARGET_COL].values

    # Standardize feature columns only
    scaler = StandardScaler()
    X_train[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train[:, :len(FEATURE_COLS)])
    X_test[:, :len(FEATURE_COLS)] = scaler.transform(X_test[:, :len(FEATURE_COLS)])

    return X_train, y_train, X_test, y_test, scaler

def objective(trial, X, y):
    """Optuna objective function for hyperparameter tuning."""
    C = trial.suggest_float('C', 1e-2, 1e3, log=True)
    gamma = trial.suggest_float('gamma', 1e-4, 1e1, log=True)
    epsilon = trial.suggest_float('epsilon', 1e-3, 1.0, log=True)

    model = SVR(C=C, gamma=gamma, epsilon=epsilon)
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    scores = []

    for train_idx, val_idx in kf.split(X):
        X_train_fold, X_val_fold = X[train_idx].copy(), X[val_idx].copy()
        y_train_fold, y_val_fold = y[train_idx], y[val_idx]

        # Scale features for each fold
        scaler = StandardScaler()
        X_train_fold[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train_fold[:, :len(FEATURE_COLS)])
        X_val_fold[:, :len(FEATURE_COLS)] = scaler.transform(X_val_fold[:, :len(FEATURE_COLS)])

        model.fit(X_train_fold, y_train_fold)
        y_pred = model.predict(X_val_fold)
        scores.append(r2_score(y_val_fold, y_pred))

    return np.mean(scores)

def plot_learning_curve(estimator, X, y, title, save_path):
    """Generate and save learning curve plot."""
    train_sizes, train_scores, valid_scores = learning_curve(
        estimator, X, y, cv=5, scoring='r2', train_sizes=np.linspace(0.1, 1.0, 5), random_state=SEED)

    train_scores_mean = np.mean(train_scores, axis=1)
    valid_scores_mean = np.mean(valid_scores, axis=1)

    plt.figure()
    plt.plot(train_sizes, train_scores_mean, label='Training score')
    plt.plot(train_sizes, valid_scores_mean, label='Validation score')
    plt.xlabel("Training Set Size")
    plt.ylabel("RÂ² Score")
    plt.title(title)
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_scatter_and_residuals(y_true, y_pred, base_name):
    """Generate scatter plot and residual plot."""
    
    # Scatter plot
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    plt.grid(False)

    plt.scatter(y_true, y_pred, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)],
             linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    plt.xlabel("True Retention Time (s)")
    plt.ylabel("Predicted Retention Time (s)")
    plt.title("")
    plt.text(0.5, -0.15, "Predicted vs True Values", ha='center', va='center', 
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2:.3f}\nMAE = {mae:.3f}", 
             transform=ax.transAxes, verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_scatter.png"))
    plt.close()

    # Residual plot
    residuals = y_pred - y_true
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    plt.grid(False)

    plt.scatter(y_pred, residuals, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.axhline(y=0, linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    plt.xlabel("Predicted Retention Time (s)")
    plt.ylabel("Residuals (Predicted - True)")
    plt.title("")
    plt.text(0.5, -0.15, "Residual Plot", ha='center', va='center', 
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2:.3f}\nMAE = {mae:.3f}", 
             transform=ax.transAxes, verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_residuals.png"))
    plt.close()

def train_and_evaluate(train_csv, test_csv):
    """Main training and evaluation pipeline."""
    base_name = os.path.splitext(os.path.basename(train_csv))[0].replace("_train", "")
    print(f"\nðŸš€ Training on dataset: {base_name}")

    X_train, y_train, X_test, y_test, scaler = load_and_prepare_data(train_csv, test_csv)

    # Hyperparameter optimization
    study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=30)

    best_params = study.best_params
    model = SVR(**best_params)
    model.fit(X_train, y_train)

    # Model evaluation
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    print(f"ðŸ“Š RÂ²: {r2:.4f} | RMSE: {rmse:.4f} | MAE: {mae:.4f}")

    # Save model and scaler
    joblib.dump(model, os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_svr_model.joblib"))
    joblib.dump(scaler, os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_scaler.joblib"))

    # Save predictions
    pd.DataFrame({'y_true': y_test, 'y_pred': y_pred}).to_csv(
        os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_predictions.csv"), index=False
    )

    # Generate plots
    plot_learning_curve(SVR(**best_params), X_train, y_train,
                        f"Learning Curve - {base_name}",
                        os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_learning_curve.png"))

    plot_scatter_and_residuals(y_test, y_pred, base_name)

    # Record results
    all_eval_results.append({
        "Dataset": base_name,
        "R2": r2,
        "RMSE": rmse,
        "MAE": mae,
        "C": best_params['C'],
        "gamma": best_params['gamma'],
        "epsilon": best_params['epsilon']
    })

    # Save summary
    summary_df = pd.DataFrame(all_eval_results)
    summary_df.to_csv(os.path.join(MODEL_SAVE_FOLDER, "AM-I-svr_model_evaluation_summary.csv"), index=False)
    print(f"\nâœ… All model evaluation results saved to: AM-I-svr_model_evaluation_summary.csv")

if __name__ == "__main__":
    train_csv = os.path.join(DATA_FOLDER, "AM-I-filtered_with_labels_k4_train.csv")
    test_csv = os.path.join(DATA_FOLDER, "AM-I-filtered_with_labels_k4_test.csv")
    train_and_evaluate(train_csv, test_csv)

  from .autonotebook import tqdm as notebook_tqdm



ðŸš€ Training on dataset: AM-I-filtered_with_labels_k4


[I 2026-01-18 10:53:20,860] A new study created in memory with name: no-name-b4592c46-6605-40ff-8303-aa502104f31d
[I 2026-01-18 10:55:38,100] Trial 0 finished with value: -0.003930982826524598 and parameters: {'C': 0.7459343285726545, 'gamma': 5.669849511478847, 'epsilon': 0.15702970884055384}. Best is trial 0 with value: -0.003930982826524598.
[I 2026-01-18 10:57:54,376] Trial 1 finished with value: 0.6184433368873307 and parameters: {'C': 9.846738873614559, 'gamma': 0.0006026889128682511, 'epsilon': 0.0029375384576328283}. Best is trial 1 with value: 0.6184433368873307.
[I 2026-01-18 11:00:11,877] Trial 2 finished with value: -0.009444576488156508 and parameters: {'C': 0.0195172246414495, 'gamma': 2.1423021757741068, 'epsilon': 0.06358358856676251}. Best is trial 1 with value: 0.6184433368873307.
[I 2026-01-18 11:02:16,831] Trial 3 finished with value: 0.5856733757479251 and parameters: {'C': 34.70266988650411, 'gamma': 0.00012674255898937226, 'epsilon': 0.8123245085588685}. Best is 

ðŸ“Š RÂ²: 0.9145 | RMSE: 3.7973 | MAE: 2.8457

âœ… All model evaluation results saved to: AM-I-svr_model_evaluation_summary.csv


#svr for AM-II

In [3]:
import os
import joblib
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ========== Configuration ==========
DATA_FOLDER = './train_test_split'
OUTPUT_FOLDER = './svr-models'
MODEL_SAVE_FOLDER = os.path.join(OUTPUT_FOLDER, 'AM-II-svr-model')
SEED = 42

FEATURE_COLS = ['MolWt', 'logP', 'TPSA', 'H_bond_donors', 'H_bond_acceptors']
FP_COLS = [f'col{i}' for i in range(823)]
MG_COLS = [f'fp_{i}' for i in range(1024)]
ALL_FEATURES = FEATURE_COLS + FP_COLS + MG_COLS
TARGET_COL = 'UV_RT-s'

os.makedirs(MODEL_SAVE_FOLDER, exist_ok=True)

# Global evaluation results
all_eval_results = []

# iPhone color palette (clean style)
IPHONE_COLORS = {
    "scatter": "#007AFF",
    "line": "#FF3B30",
    "residual": "#34C759",
    "text": "#1C1C1E"
}

def load_and_prepare_data(train_file, test_file):
    """Load and preprocess training and test data."""
    train_df = pd.read_csv(train_file).dropna(subset=ALL_FEATURES + [TARGET_COL])
    test_df = pd.read_csv(test_file).dropna(subset=ALL_FEATURES + [TARGET_COL])

    X_train = train_df[ALL_FEATURES].values
    y_train = train_df[TARGET_COL].values
    X_test = test_df[ALL_FEATURES].values
    y_test = test_df[TARGET_COL].values

    scaler = StandardScaler()
    X_train[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train[:, :len(FEATURE_COLS)])
    X_test[:, :len(FEATURE_COLS)] = scaler.transform(X_test[:, :len(FEATURE_COLS)])

    return X_train, y_train, X_test, y_test, scaler

def objective(trial, X, y):
    """Optuna objective function for hyperparameter optimization."""
    C = trial.suggest_float('C', 1e-2, 1e3, log=True)
    gamma = trial.suggest_float('gamma', 1e-4, 1e1, log=True)
    epsilon = trial.suggest_float('epsilon', 1e-3, 1.0, log=True)

    model = SVR(C=C, gamma=gamma, epsilon=epsilon)
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    scores = []

    for train_idx, val_idx in kf.split(X):
        X_train_fold, X_val_fold = X[train_idx].copy(), X[val_idx].copy()
        y_train_fold, y_val_fold = y[train_idx], y[val_idx]

        scaler = StandardScaler()
        X_train_fold[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train_fold[:, :len(FEATURE_COLS)])
        X_val_fold[:, :len(FEATURE_COLS)] = scaler.transform(X_val_fold[:, :len(FEATURE_COLS)])

        model.fit(X_train_fold, y_train_fold)
        y_pred = model.predict(X_val_fold)
        scores.append(r2_score(y_val_fold, y_pred))

    return np.mean(scores)

def plot_learning_curve(estimator, X, y, title, save_path):
    """Plot learning curve for model evaluation."""
    train_sizes, train_scores, valid_scores = learning_curve(
        estimator, X, y, cv=5, scoring='r2', train_sizes=np.linspace(0.1, 1.0, 5), random_state=SEED)

    train_scores_mean = np.mean(train_scores, axis=1)
    valid_scores_mean = np.mean(valid_scores, axis=1)

    plt.figure()
    plt.plot(train_sizes, train_scores_mean, label='Training score')
    plt.plot(train_sizes, valid_scores_mean, label='Validation score')
    plt.xlabel("Training Set Size")
    plt.ylabel("R2 Score")
    plt.title(title)
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_scatter_and_residuals(y_true, y_pred, base_name):
    """Generate scatter and residual plots."""
    # Scatter plot
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    plt.grid(False)

    plt.scatter(y_true, y_pred, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)],
             linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    plt.xlabel("True Retention Time (s)")
    plt.ylabel("Predicted Retention Time (s)")
    plt.title("")
    plt.text(0.5, -0.15, "Predicted vs True Values", ha='center', va='center', 
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2:.3f}\nMAE = {mae:.3f}", transform=ax.transAxes, 
             verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_scatter.png"))
    plt.close()

    # Residual plot
    residuals = y_pred - y_true
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    plt.grid(False)

    plt.scatter(y_pred, residuals, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.axhline(y=0, linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    r2_res = r2_score(y_true, y_pred)
    mae_res = mean_absolute_error(y_true, y_pred)

    plt.xlabel("Predicted Retention Time (s)")
    plt.ylabel("Residuals (Predicted - True)")
    plt.title("")
    plt.text(0.5, -0.15, "Residual Plot", ha='center', va='center', 
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2_res:.3f}\nMAE = {mae_res:.3f}", transform=ax.transAxes, 
             verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_residuals.png"))
    plt.close()

def train_and_evaluate(train_csv, test_csv):
    """Main training and evaluation pipeline."""
    base_name = os.path.splitext(os.path.basename(train_csv))[0].replace("_train", "")
    print(f"\nðŸš€ Training on dataset: {base_name}")

    X_train, y_train, X_test, y_test, scaler = load_and_prepare_data(train_csv, test_csv)

    study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=30)

    best_params = study.best_params
    model = SVR(**best_params)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    print(f"ðŸ“Š R2: {r2:.4f} | RMSE: {rmse:.4f} | MAE: {mae:.4f}")

    joblib.dump(model, os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_svr_model.joblib"))
    joblib.dump(scaler, os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_scaler.joblib"))

    pd.DataFrame({'y_true': y_test, 'y_pred': y_pred}).to_csv(
        os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_predictions.csv"), index=False
    )

    plot_learning_curve(SVR(**best_params), X_train, y_train,
                        f"Learning Curve - {base_name}",
                        os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_learning_curve.png"))

    plot_scatter_and_residuals(y_test, y_pred, base_name)

    all_eval_results.append({
        "Dataset": base_name,
        "R2": r2,
        "RMSE": rmse,
        "MAE": mae,
        "C": best_params['C'],
        "gamma": best_params['gamma'],
        "epsilon": best_params['epsilon']
    })

    summary_df = pd.DataFrame(all_eval_results)
    summary_df.to_csv(os.path.join(MODEL_SAVE_FOLDER, "AM-II-Neutral-svr_model_evaluation_summary.csv"), index=False)
    print(f"\nâœ… All model evaluation results saved to: AM-II-Neutral-svr_model_evaluation_summary.csv")

if __name__ == "__main__":
    train_csv = os.path.join(DATA_FOLDER, "AM-II-filtered_with_labels_k3_train.csv")
    test_csv = os.path.join(DATA_FOLDER, "AM-II-filtered_with_labels_k3_test.csv")
    train_and_evaluate(train_csv, test_csv)


ðŸš€ Training on dataset: AM-II-filtered_with_labels_k3


[I 2026-01-18 12:12:58,315] A new study created in memory with name: no-name-e984788d-f078-4689-9eb7-7f60abc8ffc6
[I 2026-01-18 12:13:04,288] Trial 0 finished with value: -0.027286032614959987 and parameters: {'C': 0.7459343285726545, 'gamma': 5.669849511478847, 'epsilon': 0.15702970884055384}. Best is trial 0 with value: -0.027286032614959987.
[I 2026-01-18 12:13:10,383] Trial 1 finished with value: 0.5247742466641797 and parameters: {'C': 9.846738873614559, 'gamma': 0.0006026889128682511, 'epsilon': 0.0029375384576328283}. Best is trial 1 with value: 0.5247742466641797.
[I 2026-01-18 12:13:16,420] Trial 2 finished with value: -0.025673500376314794 and parameters: {'C': 0.0195172246414495, 'gamma': 2.1423021757741068, 'epsilon': 0.06358358856676251}. Best is trial 1 with value: 0.5247742466641797.
[I 2026-01-18 12:13:21,613] Trial 3 finished with value: 0.4792961381370079 and parameters: {'C': 34.70266988650411, 'gamma': 0.00012674255898937226, 'epsilon': 0.8123245085588685}. Best is 

ðŸ“Š R2: 0.8917 | RMSE: 3.0097 | MAE: 2.0730

âœ… All model evaluation results saved to: AM-II-Neutral-svr_model_evaluation_summary.csv


#svr for AM-III

In [2]:
import os
import joblib
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ========== Configuration ==========
DATA_FOLDER = './train_test_split'
OUTPUT_FOLDER = './svr-models'
MODEL_SAVE_FOLDER = os.path.join(OUTPUT_FOLDER, 'AM-III-svr-model')
SEED = 42

FEATURE_COLS = ['MolWt', 'logP', 'TPSA', 'H_bond_donors', 'H_bond_acceptors']
FP_COLS = [f'col{i}' for i in range(823)]
MG_COLS = [f'fp_{i}' for i in range(1024)]
ALL_FEATURES = FEATURE_COLS + FP_COLS + MG_COLS
TARGET_COL = 'UV_RT-s'

os.makedirs(MODEL_SAVE_FOLDER, exist_ok=True)

# Global evaluation results
all_eval_results = []

# iPhone color palette (clean style)
IPHONE_COLORS = {
    "scatter": "#007AFF",
    "line": "#FF3B30",
    "residual": "#34C759",
    "text": "#1C1C1E"
}

def load_and_prepare_data(train_file, test_file):
    """Load and preprocess training and test data."""
    train_df = pd.read_csv(train_file).dropna(subset=ALL_FEATURES + [TARGET_COL])
    test_df = pd.read_csv(test_file).dropna(subset=ALL_FEATURES + [TARGET_COL])

    X_train = train_df[ALL_FEATURES].values
    y_train = train_df[TARGET_COL].values
    X_test = test_df[ALL_FEATURES].values
    y_test = test_df[TARGET_COL].values

    scaler = StandardScaler()
    X_train[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train[:, :len(FEATURE_COLS)])
    X_test[:, :len(FEATURE_COLS)] = scaler.transform(X_test[:, :len(FEATURE_COLS)])

    return X_train, y_train, X_test, y_test, scaler

def objective(trial, X, y):
    """Optuna objective function for hyperparameter optimization."""
    C = trial.suggest_float('C', 1e-2, 1e3, log=True)
    gamma = trial.suggest_float('gamma', 1e-4, 1e1, log=True)
    epsilon = trial.suggest_float('epsilon', 1e-3, 1.0, log=True)

    model = SVR(C=C, gamma=gamma, epsilon=epsilon)
    kf = KFold(n_splits=5, shuffle=True, random_state=SEED)
    scores = []

    for train_idx, val_idx in kf.split(X):
        X_train_fold, X_val_fold = X[train_idx].copy(), X[val_idx].copy()
        y_train_fold, y_val_fold = y[train_idx], y[val_idx]

        scaler = StandardScaler()
        X_train_fold[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train_fold[:, :len(FEATURE_COLS)])
        X_val_fold[:, :len(FEATURE_COLS)] = scaler.transform(X_val_fold[:, :len(FEATURE_COLS)])

        model.fit(X_train_fold, y_train_fold)
        y_pred = model.predict(X_val_fold)
        scores.append(r2_score(y_val_fold, y_pred))

    return np.mean(scores)

def plot_learning_curve(estimator, X, y, title, save_path):
    """Plot learning curve for model evaluation."""
    train_sizes, train_scores, valid_scores = learning_curve(
        estimator, X, y, cv=5, scoring='r2', train_sizes=np.linspace(0.1, 1.0, 5), random_state=SEED)

    train_scores_mean = np.mean(train_scores, axis=1)
    valid_scores_mean = np.mean(valid_scores, axis=1)

    plt.figure()
    plt.plot(train_sizes, train_scores_mean, label='Training score')
    plt.plot(train_sizes, valid_scores_mean, label='Validation score')
    plt.xlabel("Training Set Size")
    plt.ylabel("R2 Score")
    plt.title(title)
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_scatter_and_residuals(y_true, y_pred, base_name):
    """Generate scatter and residual plots."""
    # Scatter plot
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    plt.grid(False)

    plt.scatter(y_true, y_pred, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)],
             linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)

    plt.xlabel("True Retention Time (s)")
    plt.ylabel("Predicted Retention Time (s)")
    plt.title("")
    plt.text(0.5, -0.15, "Predicted vs True Values", ha='center', va='center', 
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2:.3f}\nMAE = {mae:.3f}", transform=ax.transAxes, 
             verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_scatter.png"))
    plt.close()

    # Residual plot
    residuals = y_pred - y_true
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    ax.spines['top'].set_visible(True)
    ax.spines['right'].set_visible(True)
    ax.spines['bottom'].set_visible(True)
    ax.spines['left'].set_visible(True)
    plt.grid(False)

    plt.scatter(y_pred, residuals, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.axhline(y=0, linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    r2_res = r2_score(y_true, y_pred)
    mae_res = mean_absolute_error(y_true, y_pred)

    plt.xlabel("Predicted Retention Time (s)")
    plt.ylabel("Residuals (Predicted - True)")
    plt.title("")
    plt.text(0.5, -0.15, "Residual Plot", ha='center', va='center', 
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2_res:.3f}\nMAE = {mae_res:.3f}", transform=ax.transAxes, 
             verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_residuals.png"))
    plt.close()

def train_and_evaluate(train_csv, test_csv):
    """Main training and evaluation pipeline."""
    base_name = os.path.splitext(os.path.basename(train_csv))[0].replace("_train", "")
    print(f"\nðŸš€ Training on dataset: {base_name}")

    X_train, y_train, X_test, y_test, scaler = load_and_prepare_data(train_csv, test_csv)

    study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=30)

    best_params = study.best_params
    model = SVR(**best_params)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    print(f"ðŸ“Š R2: {r2:.4f} | RMSE: {rmse:.4f} | MAE: {mae:.4f}")

    joblib.dump(model, os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_svr_model.joblib"))
    joblib.dump(scaler, os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_scaler.joblib"))

    pd.DataFrame({'y_true': y_test, 'y_pred': y_pred}).to_csv(
        os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_predictions.csv"), index=False
    )

    plot_learning_curve(SVR(**best_params), X_train, y_train,
                        f"Learning Curve - {base_name}",
                        os.path.join(MODEL_SAVE_FOLDER, f"{base_name}_learning_curve.png"))

    plot_scatter_and_residuals(y_test, y_pred, base_name)

    all_eval_results.append({
        "Dataset": base_name,
        "R2": r2,
        "RMSE": rmse,
        "MAE": mae,
        "C": best_params['C'],
        "gamma": best_params['gamma'],
        "epsilon": best_params['epsilon']
    })

    summary_df = pd.DataFrame(all_eval_results)
    summary_df.to_csv(os.path.join(MODEL_SAVE_FOLDER, "AM-III-svr_model_evaluation_summary.csv"), index=False)
    print(f"\nâœ… All model evaluation results saved to: AM-III-svr_model_evaluation_summary.csv")

if __name__ == "__main__":
    train_csv = os.path.join(DATA_FOLDER, "AM-III-filtered_with_labels_k4_train.csv")
    test_csv = os.path.join(DATA_FOLDER, "AM-III-filtered_with_labels_k4_test.csv")
    train_and_evaluate(train_csv, test_csv)


ðŸš€ Training on dataset: AM-III-filtered_with_labels_k4


[I 2026-01-18 12:11:19,025] A new study created in memory with name: no-name-e3b730e0-6e32-48f5-a118-b37d4e51d43a
[I 2026-01-18 12:11:21,777] Trial 0 finished with value: -0.016026618949243555 and parameters: {'C': 0.7459343285726545, 'gamma': 5.669849511478847, 'epsilon': 0.15702970884055384}. Best is trial 0 with value: -0.016026618949243555.
[I 2026-01-18 12:11:24,523] Trial 1 finished with value: 0.5435040417541912 and parameters: {'C': 9.846738873614559, 'gamma': 0.0006026889128682511, 'epsilon': 0.0029375384576328283}. Best is trial 1 with value: 0.5435040417541912.
[I 2026-01-18 12:11:27,301] Trial 2 finished with value: -0.01716512868399751 and parameters: {'C': 0.0195172246414495, 'gamma': 2.1423021757741068, 'epsilon': 0.06358358856676251}. Best is trial 1 with value: 0.5435040417541912.
[I 2026-01-18 12:11:29,849] Trial 3 finished with value: 0.5004563865397835 and parameters: {'C': 34.70266988650411, 'gamma': 0.00012674255898937226, 'epsilon': 0.8123245085588685}. Best is t

ðŸ“Š R2: 0.8775 | RMSE: 5.1064 | MAE: 3.2658

âœ… All model evaluation results saved to: AM-III-svr_model_evaluation_summary.csv


#svr for AM-IV, AM-V, AM-VI, AM-VII with nestcv

In [4]:
import os
import glob
import joblib
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.svm import SVR
from sklearn.model_selection import KFold, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings

warnings.filterwarnings("ignore")

# ----------------- Configuration -----------------
DATA_FOLDER = './processed_results'                 # Data folder (CSV)
MODEL_SAVE_FOLDER = './svr-model-other4'
SEED = 42
np.random.seed(SEED)

# Keep consistent feature columns
FEATURE_COLS = ['MolWt', 'logP', 'TPSA', 'H_bond_donors', 'H_bond_acceptors']
FP_COLS = [f'col{i}' for i in range(823)]
MG_COLS = [f'fp_{i}' for i in range(1024)]
ALL_FEATURES = FEATURE_COLS + FP_COLS + MG_COLS
TARGET_COL = 'UV_RT-s'

# Allowed file name prefixes (user specified)
ALLOWED_PREFIXES = {
    "AM-IV-filtered",
    "AM-V-filtered", 
    "AM-VI-filtered",
    "AM-VII-filtered"
}

# Use 26 cores
N_JOBS = 26

os.makedirs(MODEL_SAVE_FOLDER, exist_ok=True)

IPHONE_COLORS = {
    "scatter": "#007AFF",
    "line": "#FF3B30",
    "residual": "#34C759",
    "text": "#1C1C1E"
}

# ----------------- Data Loading -----------------
def load_data():
    data_files = glob.glob(os.path.join(DATA_FOLDER, '*.csv'))
    dfs = []
    for f in data_files:
        file_prefix = os.path.splitext(os.path.basename(f))[0]
        if file_prefix not in ALLOWED_PREFIXES:
            continue
        df = pd.read_csv(f)
        needed_cols = set(ALL_FEATURES + [TARGET_COL])
        if not needed_cols.issubset(set(df.columns)):
            print(f"Warning: file {f} missing required columns, skipping.")
            continue
        df = df.dropna(subset=ALL_FEATURES + [TARGET_COL]).copy()
        if df.shape[0] == 0:
            print(f"Warning: file {f} has no valid rows after dropna, skipping.")
            continue
        df['file_prefix'] = file_prefix
        dfs.append(df)
    if len(dfs) == 0:
        raise RuntimeError("No valid data files found for the allowed prefixes.")
    data = pd.concat(dfs, ignore_index=True)
    return data

# ----------------- Plotting Helpers -----------------
def plot_learning_curve(estimator, X, y, title, save_path):
    train_sizes, train_scores, valid_scores = learning_curve(
        estimator, X, y, cv=5, scoring='r2',
        train_sizes=np.linspace(0.1, 1.0, 5), random_state=SEED, n_jobs=N_JOBS)

    train_scores_mean = np.mean(train_scores, axis=1)
    valid_scores_mean = np.mean(valid_scores, axis=1)

    plt.figure()
    plt.plot(train_sizes, train_scores_mean, label='Training score')
    plt.plot(train_sizes, valid_scores_mean, label='Validation score')
    plt.xlabel("Training Set Size")
    plt.ylabel("R2 Score")
    plt.title(title)
    plt.legend(loc='best')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_scatter_and_residuals(y_true, y_pred, save_prefix):
    # Scatter plot
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    for spine in ['top', 'right', 'bottom', 'left']:
        ax.spines[spine].set_visible(True)
    plt.grid(False)

    plt.scatter(y_true, y_pred, alpha=0.6, color=IPHONE_COLORS['scatter'])
    if len(y_true) > 0:
        plt.plot([min(y_true), max(y_true)], [min(y_true), max(y_true)],
                 linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    r2 = r2_score(y_true, y_pred) if len(y_true) > 0 else np.nan
    mae = mean_absolute_error(y_true, y_pred) if len(y_true) > 0 else np.nan

    plt.xlabel("True Retention Time (s)")
    plt.ylabel("Predicted Retention Time (s)")
    plt.text(0.5, -0.15, "Predicted vs True Values", ha='center', va='center',
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2:.3f}\nMAE = {mae:.3f}", transform=ax.transAxes,
             verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(f"{save_prefix}_scatter.png")
    plt.close()

    # Residual plot
    residuals = y_pred - y_true
    plt.figure(figsize=(6, 6))
    ax = plt.gca()
    ax.tick_params(axis='both', direction='out', length=6, width=1.2)
    for spine in ['top', 'right', 'bottom', 'left']:
        ax.spines[spine].set_visible(True)
    plt.grid(False)

    plt.scatter(y_pred, residuals, alpha=0.6, color=IPHONE_COLORS['scatter'])
    plt.axhline(y=0, linestyle='--', color=IPHONE_COLORS['line'], linewidth=1)

    plt.xlabel("Predicted Retention Time (s)")
    plt.ylabel("Residuals (Predicted - True)")
    plt.text(0.5, -0.15, "Residual Plot", ha='center', va='center',
             transform=ax.transAxes, fontsize=12, color=IPHONE_COLORS['text'])
    plt.text(0.05, 0.95, f"RÂ² = {r2:.3f}\nMAE = {mae:.3f}", transform=ax.transAxes,
             verticalalignment='top', fontsize=10, color=IPHONE_COLORS['text'])
    plt.tight_layout()
    plt.savefig(f"{save_prefix}_residuals.png")
    plt.close()

# ----------------- Optuna Objective -----------------
def make_inner_objective(X_train, y_train, n_inner_splits=3):
    def objective(trial):
        C = trial.suggest_float('C', 1e-2, 1e3, log=True)
        gamma = trial.suggest_float('gamma', 1e-4, 1e1, log=True)
        epsilon = trial.suggest_float('epsilon', 1e-3, 1.0, log=True)

        model = SVR(C=C, gamma=gamma, epsilon=epsilon)
        kf_inner = KFold(n_splits=n_inner_splits, shuffle=True, random_state=SEED)

        inner_scores = []
        for tr_idx, val_idx in kf_inner.split(X_train):
            X_tr, X_val = X_train[tr_idx].copy(), X_train[val_idx].copy()
            y_tr, y_val = y_train[tr_idx], y_train[val_idx]

            scaler = StandardScaler()
            X_tr[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_tr[:, :len(FEATURE_COLS)])
            X_val[:, :len(FEATURE_COLS)] = scaler.transform(X_val[:, :len(FEATURE_COLS)])

            model.fit(X_tr, y_tr)
            y_pred = model.predict(X_val)
            inner_scores.append(r2_score(y_val, y_pred))

        return np.mean(inner_scores)
    return objective

# ----------------- Nested CV -----------------
def nested_cv_evaluate(X, y, outer_splits=5, inner_splits=3, n_trials=100):
    kf_outer = KFold(n_splits=outer_splits, shuffle=True, random_state=SEED)
    fold_results = []

    for fold_idx, (train_idx, val_idx) in enumerate(kf_outer.split(X), 1):
        print(f"\n--- Outer Fold {fold_idx}/{outer_splits} ---")
        X_train, X_val = X[train_idx].copy(), X[val_idx].copy()
        y_train, y_val = y[train_idx], y[val_idx]

        study = optuna.create_study(direction='maximize',
                                    sampler=optuna.samplers.TPESampler(seed=SEED))
        objective = make_inner_objective(X_train, y_train, n_inner_splits=inner_splits)
        study.optimize(objective, n_trials=n_trials, n_jobs=N_JOBS)
        best_params = study.best_params
        best_value = study.best_value
        print(f"  Inner best params: {best_params}, inner CV mean R2 = {best_value:.4f}")

        scaler = StandardScaler()
        X_train[:, :len(FEATURE_COLS)] = scaler.fit_transform(X_train[:, :len(FEATURE_COLS)])
        X_val[:, :len(FEATURE_COLS)] = scaler.transform(X_val[:, :len(FEATURE_COLS)])

        model = SVR(**best_params)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        r2 = r2_score(y_val, y_pred)
        rmse = np.sqrt(mean_squared_error(y_val, y_pred))
        mae = mean_absolute_error(y_val, y_pred)

        print(f"  Outer fold {fold_idx} metrics - R2: {r2:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}")

        fold_results.append({
            'fold': fold_idx,
            'best_params': best_params,
            'inner_best_value': best_value,
            'r2': r2,
            'rmse': rmse,
            'mae': mae,
            'y_true': y_val,
            'y_pred': y_pred
        })

    r2s = [f['r2'] for f in fold_results]
    rmses = [f['rmse'] for f in fold_results]
    maes = [f['mae'] for f in fold_results]

    summary = {
        'r2_mean': np.mean(r2s),
        'r2_std': np.std(r2s, ddof=1),
        'rmse_mean': np.mean(rmses),
        'rmse_std': np.std(rmses, ddof=1),
        'mae_mean': np.mean(maes),
        'mae_std': np.std(maes, ddof=1)
    }

    return fold_results, summary

# ----------------- Single File Processing -----------------
def process_single_file(df, file_prefix,
                        outer_splits=5, inner_splits=3, n_trials=100):
    print(f"\n========== Processing {file_prefix} ==========")
    X = df[ALL_FEATURES].values
    y = df[TARGET_COL].values

    fold_results, summary = nested_cv_evaluate(X, y,
                                               outer_splits=outer_splits,
                                               inner_splits=inner_splits,
                                               n_trials=n_trials)

    print(f"\n{file_prefix} - Nested CV summary:")
    print(f"  R2: {summary['r2_mean']:.4f} Â± {summary['r2_std']:.4f}")
    print(f"  RMSE: {summary['rmse_mean']:.4f} Â± {summary['rmse_std']:.4f}")
    print(f"  MAE: {summary['mae_mean']:.4f} Â± {summary['mae_std']:.4f}")

    # Save results
    fold_preds = []
    for fr in fold_results:
        fold_df = pd.DataFrame({
            'y_true': fr['y_true'],
            'y_pred': fr['y_pred']
        })
        fold_df['fold'] = fr['fold']
        fold_preds.append(fold_df)
    all_fold_preds_df = pd.concat(fold_preds, ignore_index=True)
    all_fold_preds_df.to_csv(os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_nestedcv_outer_preds.csv"),
                             index=False)

    summary_df = pd.DataFrame([{
        'file_prefix': file_prefix,
        'r2_mean': summary['r2_mean'],
        'r2_std': summary['r2_std'],
        'rmse_mean': summary['rmse_mean'],
        'rmse_std': summary['rmse_std'],
        'mae_mean': summary['mae_mean'],
        'mae_std': summary['mae_std']
    }])
    summary_df.to_csv(os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_nestedcv_summary.csv"), index=False)

    params_df = pd.DataFrame([fr['best_params'] for fr in fold_results])
    params_df.to_csv(os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_inner_best_params_per_fold.csv"), index=False)

    print("\nRunning final inner hyperparameter search on FULL data...")
    study_final = optuna.create_study(direction='maximize',
                                      sampler=optuna.samplers.TPESampler(seed=SEED))
    final_objective = make_inner_objective(X, y, n_inner_splits=inner_splits)
    study_final.optimize(final_objective, n_trials=n_trials, n_jobs=N_JOBS)
    final_best_params = study_final.best_params
    print(f"Final best params on FULL data: {final_best_params}")

    final_scaler = StandardScaler()
    X_scaled = X.copy()
    X_scaled[:, :len(FEATURE_COLS)] = final_scaler.fit_transform(X_scaled[:, :len(FEATURE_COLS)])
    final_model = SVR(**final_best_params)
    final_model.fit(X_scaled, y)

    model_path = os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_final_svr_model.joblib")
    scaler_path = os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_final_scaler.joblib")
    joblib.dump(final_model, model_path)
    joblib.dump(final_scaler, scaler_path)

    pd.DataFrame([final_best_params]).to_csv(
        os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_final_best_params.csv"), index=False)

    lc_path = os.path.join(MODEL_SAVE_FOLDER, f"{file_prefix}_learning_curve.png")
    plot_learning_curve(final_model, X_scaled, y, f"Learning Curve - {file_prefix}", lc_path)

    y_true_all = all_fold_preds_df['y_true'].values
    y_pred_all = all_fold_preds_df['y_pred'].values
    plot_prefix = os.path.join(MODEL_SAVE_FOLDER, file_prefix)
    plot_scatter_and_residuals(y_true_all, y_pred_all, plot_prefix)

    print(f"Saved final model to: {model_path}")
    print(f"Saved final scaler to: {scaler_path}")
    return summary

# ----------------- Main -----------------
def main():
    data = load_data()
    summaries = []
    for file_prefix, df_group in data.groupby('file_prefix'):
        s = process_single_file(df_group, file_prefix,
                                outer_splits=5, inner_splits=3, n_trials=100)
        summaries.append({'file_prefix': file_prefix, **s})
    pd.DataFrame(summaries).to_csv(os.path.join(MODEL_SAVE_FOLDER, "all_files_nestedcv_summary.csv"), index=False)

if __name__ == "__main__":
    main()

[I 2026-01-18 12:16:30,160] A new study created in memory with name: no-name-eb5c75b9-9c60-4c3e-8286-53f94fda50e7




--- Outer Fold 1/5 ---


[I 2026-01-18 12:16:30,602] Trial 1 finished with value: 0.7398295281035788 and parameters: {'C': 62.5238432689369, 'gamma': 0.0004884747714249566, 'epsilon': 0.7115145816863668}. Best is trial 1 with value: 0.7398295281035788.
[I 2026-01-18 12:16:30,704] Trial 2 finished with value: -0.032074570108723575 and parameters: {'C': 577.0624003828505, 'gamma': 5.643884317379479, 'epsilon': 0.0014667017513184187}. Best is trial 1 with value: 0.7398295281035788.
[I 2026-01-18 12:16:30,722] Trial 0 finished with value: 0.0629172824386045 and parameters: {'C': 7.0657455235022875, 'gamma': 0.0001303855851951483, 'epsilon': 0.0012882926986205175}. Best is trial 1 with value: 0.7398295281035788.
[I 2026-01-18 12:16:30,736] Trial 3 finished with value: -0.0195375008476798 and parameters: {'C': 0.2099666712412517, 'gamma': 0.000904375324258865, 'epsilon': 0.01600641083929421}. Best is trial 1 with value: 0.7398295281035788.
[I 2026-01-18 12:16:30,749] Trial 4 finished with value: 0.15587120023697398 

  Inner best params: {'C': 438.2092464763412, 'gamma': 0.0014790082230251454, 'epsilon': 0.001948256560601007}, inner CV mean R2 = 0.9133


[I 2026-01-18 12:16:35,474] A new study created in memory with name: no-name-8227bb54-a1f8-42a4-b726-6fc93274ea26


  Outer fold 1 metrics - R2: 0.9627, RMSE: 2.6096, MAE: 1.6071

--- Outer Fold 2/5 ---


[I 2026-01-18 12:16:35,963] Trial 0 finished with value: -0.016579677225012784 and parameters: {'C': 0.1948780232265588, 'gamma': 0.00031000902143174443, 'epsilon': 0.18345645111556017}. Best is trial 0 with value: -0.016579677225012784.
[I 2026-01-18 12:16:35,977] Trial 2 finished with value: 0.06246797188746874 and parameters: {'C': 1.6954946714795005, 'gamma': 0.0005072251966865418, 'epsilon': 0.8531803800253757}. Best is trial 2 with value: 0.06246797188746874.
[I 2026-01-18 12:16:35,997] Trial 5 finished with value: -0.014326465547502498 and parameters: {'C': 0.3847230705501936, 'gamma': 0.00024534308212292004, 'epsilon': 0.08150180738777743}. Best is trial 2 with value: 0.06246797188746874.
[I 2026-01-18 12:16:36,021] Trial 3 finished with value: -0.006903542117216148 and parameters: {'C': 0.11403590913181046, 'gamma': 0.0706405219894312, 'epsilon': 0.010454089676196683}. Best is trial 2 with value: 0.06246797188746874.
[I 2026-01-18 12:16:36,031] Trial 1 finished with value: -0.

  Inner best params: {'C': 385.92480717767614, 'gamma': 0.0014756460339889557, 'epsilon': 0.05600229835509964}, inner CV mean R2 = 0.9344


[I 2026-01-18 12:16:40,749] A new study created in memory with name: no-name-ef38a0b6-54ae-4d69-add2-85ad86ab7229


  Outer fold 2 metrics - R2: 0.9169, RMSE: 3.6524, MAE: 1.3944

--- Outer Fold 3/5 ---


[I 2026-01-18 12:16:41,302] Trial 18 finished with value: 0.7646214282024325 and parameters: {'C': 6.52352686292273, 'gamma': 0.03707718960217238, 'epsilon': 0.7575915352054474}. Best is trial 18 with value: 0.7646214282024325.
[I 2026-01-18 12:16:41,318] Trial 2 finished with value: -0.030215077063607376 and parameters: {'C': 0.17614893620688002, 'gamma': 0.00015894753107640155, 'epsilon': 0.004163549512116525}. Best is trial 18 with value: 0.7646214282024325.
[I 2026-01-18 12:16:41,327] Trial 12 finished with value: 0.9117122780459207 and parameters: {'C': 436.09169336698614, 'gamma': 0.0005113862891104066, 'epsilon': 0.6978124903143345}. Best is trial 12 with value: 0.9117122780459207.
[I 2026-01-18 12:16:41,337] Trial 3 finished with value: 0.5332122378192344 and parameters: {'C': 38.810450182048264, 'gamma': 0.0002559680009780349, 'epsilon': 0.012778555181256649}. Best is trial 12 with value: 0.9117122780459207.
[I 2026-01-18 12:16:41,339] Trial 4 finished with value: 0.0177553810

  Inner best params: {'C': 274.37723837361295, 'gamma': 0.0020929292801209255, 'epsilon': 0.012688205882236141}, inner CV mean R2 = 0.9323


[I 2026-01-18 12:16:45,797] A new study created in memory with name: no-name-825b2d19-1b25-4295-83b4-d15a1664cccb


  Outer fold 3 metrics - R2: 0.9075, RMSE: 4.2332, MAE: 2.0739

--- Outer Fold 4/5 ---


[I 2026-01-18 12:16:46,290] Trial 10 finished with value: 0.9110479553495754 and parameters: {'C': 38.021721884155, 'gamma': 0.003743504558897651, 'epsilon': 0.42139390193744497}. Best is trial 10 with value: 0.9110479553495754.
[I 2026-01-18 12:16:46,293] Trial 7 finished with value: -0.047379326677259005 and parameters: {'C': 0.025374844099782544, 'gamma': 0.0880715705579491, 'epsilon': 0.2053300515096948}. Best is trial 10 with value: 0.9110479553495754.
[I 2026-01-18 12:16:46,303] Trial 0 finished with value: 0.35485841501261356 and parameters: {'C': 3.3295063064244457, 'gamma': 0.0016467551085085417, 'epsilon': 0.0010048564247278674}. Best is trial 10 with value: 0.9110479553495754.
[I 2026-01-18 12:16:46,326] Trial 1 finished with value: 0.2521903856957189 and parameters: {'C': 0.5716794862826906, 'gamma': 0.011370552810490804, 'epsilon': 0.005382814693981494}. Best is trial 10 with value: 0.9110479553495754.
[I 2026-01-18 12:16:46,327] Trial 2 finished with value: -0.05348402586

  Inner best params: {'C': 772.5046217145684, 'gamma': 0.0009776298635327358, 'epsilon': 0.0010118814980477466}, inner CV mean R2 = 0.9313


[I 2026-01-18 12:16:50,603] A new study created in memory with name: no-name-abe6e547-8c44-4658-a120-db5d63364153


  Outer fold 4 metrics - R2: 0.9570, RMSE: 2.4272, MAE: 1.6093

--- Outer Fold 5/5 ---


[I 2026-01-18 12:16:51,108] Trial 3 finished with value: 0.05231671924977025 and parameters: {'C': 2.970616805605305, 'gamma': 0.0002547536567681412, 'epsilon': 0.059481952142080206}. Best is trial 3 with value: 0.05231671924977025.
[I 2026-01-18 12:16:51,123] Trial 1 finished with value: 0.6533871712235485 and parameters: {'C': 83.54717310017325, 'gamma': 0.00021903332992332422, 'epsilon': 0.001329419179749559}. Best is trial 1 with value: 0.6533871712235485.
[I 2026-01-18 12:16:51,132] Trial 4 finished with value: -0.013137332643765265 and parameters: {'C': 0.04426487350939081, 'gamma': 0.0065166411809264675, 'epsilon': 0.11365061077875789}. Best is trial 1 with value: 0.6533871712235485.
[I 2026-01-18 12:16:51,143] Trial 5 finished with value: 0.19470649243613927 and parameters: {'C': 1.9525550221102197, 'gamma': 0.0012670333453108408, 'epsilon': 0.06208961156104356}. Best is trial 1 with value: 0.6533871712235485.
[I 2026-01-18 12:16:51,147] Trial 7 finished with value: -0.03691575

  Inner best params: {'C': 739.4159165187389, 'gamma': 0.0017446528551640149, 'epsilon': 0.0056676449795993915}, inner CV mean R2 = 0.9169


[I 2026-01-18 12:16:55,507] A new study created in memory with name: no-name-44fd32a6-0b9e-473e-9f15-4c7c584d257d


  Outer fold 5 metrics - R2: 0.9706, RMSE: 2.1116, MAE: 1.3734

AM-IV-filtered - Nested CV summary:
  R2: 0.9429 Â± 0.0287
  RMSE: 3.0068 Â± 0.8967
  MAE: 1.6116 Â± 0.2818

Running final inner hyperparameter search on FULL data...


[I 2026-01-18 12:16:56,295] Trial 1 finished with value: 0.2539335336000525 and parameters: {'C': 9.0511460838697, 'gamma': 0.0002751220014625143, 'epsilon': 0.27770693637129806}. Best is trial 1 with value: 0.2539335336000525.
[I 2026-01-18 12:16:56,305] Trial 5 finished with value: -0.034840318331903784 and parameters: {'C': 0.02264637797748714, 'gamma': 0.5688705796954512, 'epsilon': 0.004736987495615258}. Best is trial 1 with value: 0.2539335336000525.
[I 2026-01-18 12:16:56,323] Trial 0 finished with value: 0.18884268504071866 and parameters: {'C': 10.159154419927155, 'gamma': 0.13674592500588623, 'epsilon': 0.7923649200743826}. Best is trial 1 with value: 0.2539335336000525.
[I 2026-01-18 12:16:56,331] Trial 4 finished with value: 0.8350133247726882 and parameters: {'C': 242.97556018138982, 'gamma': 0.00015823052608913464, 'epsilon': 0.01727864816103946}. Best is trial 4 with value: 0.8350133247726882.
[I 2026-01-18 12:16:56,337] Trial 2 finished with value: 0.10311049260414902 a

Final best params on FULL data: {'C': 846.7566079554211, 'gamma': 0.0012112350293009813, 'epsilon': 0.40143265041021137}


[I 2026-01-18 12:17:03,183] A new study created in memory with name: no-name-8cc1c1ce-7ace-4e5d-87b3-7df8171915f5


Saved final model to: ./svr-model-other4/AM-IV-filtered_final_svr_model.joblib
Saved final scaler to: ./svr-model-other4/AM-IV-filtered_final_scaler.joblib


--- Outer Fold 1/5 ---


[I 2026-01-18 12:17:03,601] Trial 3 finished with value: 0.7010760442107076 and parameters: {'C': 159.1014138738618, 'gamma': 0.00042477902805333095, 'epsilon': 0.07488529251072566}. Best is trial 3 with value: 0.7010760442107076.
[I 2026-01-18 12:17:03,603] Trial 1 finished with value: -0.042768249694031146 and parameters: {'C': 0.9612191790710222, 'gamma': 1.8850387855095057, 'epsilon': 0.0227528995831249}. Best is trial 3 with value: 0.7010760442107076.
[I 2026-01-18 12:17:03,622] Trial 0 finished with value: 0.39305582218393814 and parameters: {'C': 34.52066636778896, 'gamma': 0.08395707740480594, 'epsilon': 0.5960378146651584}. Best is trial 3 with value: 0.7010760442107076.
[I 2026-01-18 12:17:03,690] Trial 7 finished with value: -0.004841582734226524 and parameters: {'C': 10.21951116536685, 'gamma': 7.568326932435469, 'epsilon': 0.6013304210583361}. Best is trial 3 with value: 0.7010760442107076.
[I 2026-01-18 12:17:03,717] Trial 5 finished with value: -0.003964211062310226 and 

  Inner best params: {'C': 75.26621727903245, 'gamma': 0.009234969264471716, 'epsilon': 0.048551951521267886}, inner CV mean R2 = 0.8054
  Outer fold 1 metrics - R2: 0.8876, RMSE: 3.8075, MAE: 2.7450

--- Outer Fold 2/5 ---


[I 2026-01-18 12:17:08,188] Trial 4 finished with value: -0.05206400999385267 and parameters: {'C': 0.3583621507045855, 'gamma': 0.6493029956873593, 'epsilon': 0.015653020251628933}. Best is trial 4 with value: -0.05206400999385267.
[I 2026-01-18 12:17:08,195] Trial 8 finished with value: -0.04785995422150946 and parameters: {'C': 0.016705365099836257, 'gamma': 0.0018568317561270854, 'epsilon': 0.2570394616200986}. Best is trial 8 with value: -0.04785995422150946.
[I 2026-01-18 12:17:08,216] Trial 1 finished with value: -0.0030461366892013952 and parameters: {'C': 24.824775687778605, 'gamma': 1.4058207831707865, 'epsilon': 0.0052512250801844546}. Best is trial 1 with value: -0.0030461366892013952.
[I 2026-01-18 12:17:08,233] Trial 6 finished with value: 0.7627522195201979 and parameters: {'C': 182.74391189253348, 'gamma': 0.00033871741042726324, 'epsilon': 0.04067578050909895}. Best is trial 6 with value: 0.7627522195201979.
[I 2026-01-18 12:17:08,236] Trial 7 finished with value: -0.0

  Inner best params: {'C': 244.39325228892656, 'gamma': 0.004745529074730703, 'epsilon': 0.001524914920464943}, inner CV mean R2 = 0.8472
  Outer fold 2 metrics - R2: 0.7819, RMSE: 4.1656, MAE: 2.6805

--- Outer Fold 3/5 ---


[I 2026-01-18 12:17:12,520] Trial 0 finished with value: -0.05184435486326059 and parameters: {'C': 0.013450285419556044, 'gamma': 0.057346548034354, 'epsilon': 0.11556158737518755}. Best is trial 0 with value: -0.05184435486326059.
[I 2026-01-18 12:17:12,559] Trial 2 finished with value: 0.8106779172854323 and parameters: {'C': 94.50909856166393, 'gamma': 0.004185999915678652, 'epsilon': 0.0501793768241668}. Best is trial 2 with value: 0.8106779172854323.
[I 2026-01-18 12:17:12,577] Trial 1 finished with value: -0.05819558179584446 and parameters: {'C': 0.7292265686923153, 'gamma': 1.2956591258139403, 'epsilon': 0.014021164904461479}. Best is trial 2 with value: 0.8106779172854323.
[I 2026-01-18 12:17:12,598] Trial 11 finished with value: 0.028256663202311533 and parameters: {'C': 240.92644773923033, 'gamma': 0.1887290765584213, 'epsilon': 0.008820530678695369}. Best is trial 2 with value: 0.8106779172854323.
[I 2026-01-18 12:17:12,603] Trial 5 finished with value: 0.00075983868759414

  Inner best params: {'C': 45.568509499344614, 'gamma': 0.010498429931877653, 'epsilon': 0.011961831992372994}, inner CV mean R2 = 0.8198
  Outer fold 3 metrics - R2: 0.8170, RMSE: 4.2215, MAE: 3.0440

--- Outer Fold 4/5 ---


[I 2026-01-18 12:17:16,842] Trial 2 finished with value: -0.03804643606547905 and parameters: {'C': 0.0687632418068662, 'gamma': 0.02088906073909471, 'epsilon': 0.24939561574585517}. Best is trial 2 with value: -0.03804643606547905.
[I 2026-01-18 12:17:16,873] Trial 0 finished with value: -0.05171089074681116 and parameters: {'C': 0.05177256716167889, 'gamma': 0.005237054240596634, 'epsilon': 0.014505191864571653}. Best is trial 2 with value: -0.03804643606547905.
[I 2026-01-18 12:17:16,893] Trial 3 finished with value: -0.06788355262131696 and parameters: {'C': 0.011305172049050792, 'gamma': 0.00018703179683471317, 'epsilon': 0.05205664829762028}. Best is trial 2 with value: -0.03804643606547905.
[I 2026-01-18 12:17:16,897] Trial 7 finished with value: 0.01734989032282212 and parameters: {'C': 1.6778581017756722, 'gamma': 0.09308979728239747, 'epsilon': 0.01830511402953972}. Best is trial 7 with value: 0.01734989032282212.
[I 2026-01-18 12:17:16,906] Trial 6 finished with value: -0.02

  Inner best params: {'C': 295.1947942969513, 'gamma': 0.0027812669965302443, 'epsilon': 0.0010710003633197}, inner CV mean R2 = 0.7658
  Outer fold 4 metrics - R2: 0.8903, RMSE: 3.7695, MAE: 2.6785

--- Outer Fold 5/5 ---


[I 2026-01-18 12:17:21,139] Trial 0 finished with value: -0.04677014293088555 and parameters: {'C': 3.429579762777627, 'gamma': 0.00022392166454179133, 'epsilon': 0.013549701764038911}. Best is trial 0 with value: -0.04677014293088555.
[I 2026-01-18 12:17:21,186] Trial 1 finished with value: -0.01868674872377844 and parameters: {'C': 0.21836469520991023, 'gamma': 0.009394653315255172, 'epsilon': 0.0142173737576367}. Best is trial 1 with value: -0.01868674872377844.
[I 2026-01-18 12:17:21,193] Trial 4 finished with value: -0.1087435607035016 and parameters: {'C': 0.1202271178762446, 'gamma': 0.00034544824594445815, 'epsilon': 0.00604423591578155}. Best is trial 1 with value: -0.01868674872377844.
[I 2026-01-18 12:17:21,199] Trial 2 finished with value: -0.11063233742162693 and parameters: {'C': 0.08442779243206402, 'gamma': 0.5511186441059637, 'epsilon': 0.004105795861447669}. Best is trial 1 with value: -0.01868674872377844.
[I 2026-01-18 12:17:21,207] Trial 5 finished with value: 0.83

  Inner best params: {'C': 148.53625829803016, 'gamma': 0.0042925095693460555, 'epsilon': 0.004324949413953236}, inner CV mean R2 = 0.8370
  Outer fold 5 metrics - R2: 0.8362, RMSE: 4.0530, MAE: 2.6723

AM-V-filtered - Nested CV summary:
  R2: 0.8426 Â± 0.0466
  RMSE: 4.0034 Â± 0.2058
  MAE: 2.7641 Â± 0.1593

Running final inner hyperparameter search on FULL data...


[I 2026-01-18 12:17:25,481] Trial 0 finished with value: 0.824620162578492 and parameters: {'C': 689.4114953120076, 'gamma': 0.00026618306484016294, 'epsilon': 0.14282543971736011}. Best is trial 0 with value: 0.824620162578492.
[I 2026-01-18 12:17:25,494] Trial 3 finished with value: -0.05627299830594964 and parameters: {'C': 0.08992772385520416, 'gamma': 0.21633838735143318, 'epsilon': 0.02098215204312841}. Best is trial 0 with value: 0.824620162578492.
[I 2026-01-18 12:17:25,496] Trial 2 finished with value: -0.0015332246441173887 and parameters: {'C': 5.921006223717392, 'gamma': 0.286806009418565, 'epsilon': 0.0061270089276978905}. Best is trial 0 with value: 0.824620162578492.
[I 2026-01-18 12:17:25,501] Trial 1 finished with value: -0.04979923246401041 and parameters: {'C': 0.012134672810300909, 'gamma': 0.011482719444668188, 'epsilon': 0.13122543276051807}. Best is trial 0 with value: 0.824620162578492.
[I 2026-01-18 12:17:25,509] Trial 5 finished with value: -0.0398582550566669

Final best params on FULL data: {'C': 180.00551467162956, 'gamma': 0.005924421672289322, 'epsilon': 0.008307936039066653}


[I 2026-01-18 12:17:31,208] A new study created in memory with name: no-name-2a8c9d88-296e-467a-af18-9a05ad99a696


Saved final model to: ./svr-model-other4/AM-V-filtered_final_svr_model.joblib
Saved final scaler to: ./svr-model-other4/AM-V-filtered_final_scaler.joblib


--- Outer Fold 1/5 ---


[I 2026-01-18 12:17:31,482] Trial 2 finished with value: -0.04814423341080073 and parameters: {'C': 2.322343051347587, 'gamma': 0.00012332165891299846, 'epsilon': 0.002233656200449406}. Best is trial 2 with value: -0.04814423341080073.
[I 2026-01-18 12:17:31,527] Trial 1 finished with value: -0.03230544413964487 and parameters: {'C': 489.2018822310703, 'gamma': 7.056459597150487, 'epsilon': 0.017294858308778107}. Best is trial 1 with value: -0.03230544413964487.
[I 2026-01-18 12:17:31,632] Trial 5 finished with value: 0.42581302354833933 and parameters: {'C': 172.90855181715733, 'gamma': 0.00014895619350947472, 'epsilon': 0.004160373649272654}. Best is trial 5 with value: 0.42581302354833933.
[I 2026-01-18 12:17:31,634] Trial 6 finished with value: -0.052078239364938717 and parameters: {'C': 0.29790825836226587, 'gamma': 0.2492798714435829, 'epsilon': 0.002305932895209493}. Best is trial 5 with value: 0.42581302354833933.
[I 2026-01-18 12:17:31,658] Trial 0 finished with value: -0.0551

  Inner best params: {'C': 287.84619317958436, 'gamma': 0.0049428203757493575, 'epsilon': 0.004291153251542828}, inner CV mean R2 = 0.9085
  Outer fold 1 metrics - R2: 0.9470, RMSE: 4.3389, MAE: 3.1673

--- Outer Fold 2/5 ---


[I 2026-01-18 12:17:36,810] Trial 3 finished with value: -0.04339735975067893 and parameters: {'C': 0.05463528896223615, 'gamma': 0.5465761598682327, 'epsilon': 0.056128859328869164}. Best is trial 3 with value: -0.04339735975067893.
[I 2026-01-18 12:17:36,882] Trial 1 finished with value: -0.04489551333662608 and parameters: {'C': 0.2789406911338283, 'gamma': 1.1995944799448364, 'epsilon': 0.10759884363498069}. Best is trial 3 with value: -0.04339735975067893.
[I 2026-01-18 12:17:36,953] Trial 6 finished with value: -0.03750263250148748 and parameters: {'C': 0.813850007429167, 'gamma': 0.00030530741145285084, 'epsilon': 0.013565768483384175}. Best is trial 6 with value: -0.03750263250148748.
[I 2026-01-18 12:17:36,972] Trial 8 finished with value: -0.023943579730441034 and parameters: {'C': 0.7644277387295567, 'gamma': 0.13141974574090115, 'epsilon': 0.020389859699804128}. Best is trial 8 with value: -0.023943579730441034.
[I 2026-01-18 12:17:37,015] Trial 5 finished with value: -0.04

  Inner best params: {'C': 796.3603622108894, 'gamma': 0.003049721384314918, 'epsilon': 0.15876667276780965}, inner CV mean R2 = 0.9200
  Outer fold 2 metrics - R2: 0.9305, RMSE: 5.3239, MAE: 2.8280

--- Outer Fold 3/5 ---


[I 2026-01-18 12:17:42,126] Trial 0 finished with value: -0.07524871515444335 and parameters: {'C': 0.3619564558432384, 'gamma': 0.00032346704787977976, 'epsilon': 0.0017220710214252703}. Best is trial 0 with value: -0.07524871515444335.
[I 2026-01-18 12:17:42,199] Trial 1 finished with value: -0.07825703109356434 and parameters: {'C': 0.21697869397450176, 'gamma': 4.017847070184136, 'epsilon': 0.0017471885690582636}. Best is trial 0 with value: -0.07524871515444335.
[I 2026-01-18 12:17:42,234] Trial 3 finished with value: -0.01254708288653054 and parameters: {'C': 415.5229564880304, 'gamma': 7.809744157292772, 'epsilon': 0.010909755697408022}. Best is trial 3 with value: -0.01254708288653054.
[I 2026-01-18 12:17:42,236] Trial 4 finished with value: -0.07952961373823168 and parameters: {'C': 3.1818526138734877, 'gamma': 5.926902089674841, 'epsilon': 0.035422144502063616}. Best is trial 3 with value: -0.01254708288653054.
[I 2026-01-18 12:17:42,267] Trial 6 finished with value: 0.502892

  Inner best params: {'C': 674.3863355145652, 'gamma': 0.0064876972079843205, 'epsilon': 0.6995487237394149}, inner CV mean R2 = 0.8933
  Outer fold 3 metrics - R2: 0.9561, RMSE: 4.0777, MAE: 3.0760

--- Outer Fold 4/5 ---


[I 2026-01-18 12:17:47,534] Trial 5 finished with value: 0.05424605912807989 and parameters: {'C': 1.4387971891301792, 'gamma': 0.004833768176616314, 'epsilon': 0.0034500247809791445}. Best is trial 5 with value: 0.05424605912807989.
[I 2026-01-18 12:17:47,544] Trial 1 finished with value: 0.9238480822233549 and parameters: {'C': 848.4486440536182, 'gamma': 0.0035807872378382515, 'epsilon': 0.40281488476183774}. Best is trial 1 with value: 0.9238480822233549.
[I 2026-01-18 12:17:47,566] Trial 0 finished with value: -0.07747711769586796 and parameters: {'C': 0.08584262311782473, 'gamma': 0.0011751413097929344, 'epsilon': 0.18343572789947046}. Best is trial 1 with value: 0.9238480822233549.
[I 2026-01-18 12:17:47,583] Trial 4 finished with value: 0.17060018740002084 and parameters: {'C': 42.93421647642536, 'gamma': 0.00024221632983368186, 'epsilon': 0.004928202818778628}. Best is trial 1 with value: 0.9238480822233549.
[I 2026-01-18 12:17:47,593] Trial 2 finished with value: 0.1400008230

  Inner best params: {'C': 530.4860200136636, 'gamma': 0.0037318972647396274, 'epsilon': 0.0024280305460197675}, inner CV mean R2 = 0.9298
  Outer fold 4 metrics - R2: 0.8208, RMSE: 6.9971, MAE: 3.0036

--- Outer Fold 5/5 ---


[I 2026-01-18 12:17:52,238] Trial 0 finished with value: 0.6449214876019296 and parameters: {'C': 8.639500659176978, 'gamma': 0.033763237074600265, 'epsilon': 0.740207978001345}. Best is trial 0 with value: 0.6449214876019296.
[I 2026-01-18 12:17:52,251] Trial 5 finished with value: -0.05366903897206676 and parameters: {'C': 5.179765255891514, 'gamma': 0.0003824501421554718, 'epsilon': 0.7374627824741394}. Best is trial 0 with value: 0.6449214876019296.
[I 2026-01-18 12:17:52,273] Trial 3 finished with value: 0.8786578668972211 and parameters: {'C': 287.6083967406624, 'gamma': 0.010738860935518315, 'epsilon': 0.11165475627375501}. Best is trial 3 with value: 0.8786578668972211.
[I 2026-01-18 12:17:52,305] Trial 1 finished with value: -0.07280868518242596 and parameters: {'C': 0.21484824236954253, 'gamma': 0.01119078133257217, 'epsilon': 0.11215551535841181}. Best is trial 3 with value: 0.8786578668972211.
[I 2026-01-18 12:17:52,314] Trial 4 finished with value: -0.060420484421905396 an

  Inner best params: {'C': 84.72382123046243, 'gamma': 0.007131683170378881, 'epsilon': 0.013468532270952667}, inner CV mean R2 = 0.8924
  Outer fold 5 metrics - R2: 0.9142, RMSE: 4.7365, MAE: 2.6302

AM-VI-filtered - Nested CV summary:
  R2: 0.9137 Â± 0.0544
  RMSE: 5.0948 Â± 1.1625
  MAE: 2.9410 Â± 0.2137

Running final inner hyperparameter search on FULL data...


[I 2026-01-18 12:17:57,078] Trial 5 finished with value: 0.16768708407423957 and parameters: {'C': 1.7360966879506314, 'gamma': 0.006908385041872911, 'epsilon': 0.514996283273963}. Best is trial 5 with value: 0.16768708407423957.
[I 2026-01-18 12:17:57,091] Trial 0 finished with value: -0.017157827152670595 and parameters: {'C': 653.3492466679884, 'gamma': 3.3930421429759674, 'epsilon': 0.08288714003188842}. Best is trial 5 with value: 0.16768708407423957.
[I 2026-01-18 12:17:57,113] Trial 3 finished with value: -0.05990632230634385 and parameters: {'C': 0.011300134769628796, 'gamma': 0.026119569615215915, 'epsilon': 0.0019571946262091537}. Best is trial 5 with value: 0.16768708407423957.
[I 2026-01-18 12:17:57,170] Trial 2 finished with value: -0.017080877239622705 and parameters: {'C': 87.4393774861207, 'gamma': 3.4032198221050884, 'epsilon': 0.0010409624918314442}. Best is trial 5 with value: 0.16768708407423957.
[I 2026-01-18 12:17:57,222] Trial 1 finished with value: 0.72086045365

Final best params on FULL data: {'C': 176.27916664894016, 'gamma': 0.003979168148482775, 'epsilon': 0.033322240553843585}


[I 2026-01-18 12:18:02,006] A new study created in memory with name: no-name-af3a6376-5a75-4829-bd5f-1ba871c71c63


Saved final model to: ./svr-model-other4/AM-VI-filtered_final_svr_model.joblib
Saved final scaler to: ./svr-model-other4/AM-VI-filtered_final_scaler.joblib


--- Outer Fold 1/5 ---


[I 2026-01-18 12:18:02,272] Trial 3 finished with value: 0.14197614264424427 and parameters: {'C': 0.8602957628192716, 'gamma': 0.06636267311041172, 'epsilon': 0.0010392293420432102}. Best is trial 3 with value: 0.14197614264424427.
[I 2026-01-18 12:18:02,321] Trial 1 finished with value: -0.013125459017560162 and parameters: {'C': 0.12321777865726595, 'gamma': 0.00016033257707047654, 'epsilon': 0.02881008002837209}. Best is trial 3 with value: 0.14197614264424427.
[I 2026-01-18 12:18:02,329] Trial 4 finished with value: 0.01807532065164234 and parameters: {'C': 0.8911540701592856, 'gamma': 0.0005783561506780877, 'epsilon': 0.011276516006485653}. Best is trial 3 with value: 0.14197614264424427.
[I 2026-01-18 12:18:02,347] Trial 0 finished with value: 0.014687550689278664 and parameters: {'C': 0.10898036443802084, 'gamma': 0.0055700126992536975, 'epsilon': 0.04107353833563979}. Best is trial 3 with value: 0.14197614264424427.
[I 2026-01-18 12:18:02,373] Trial 7 finished with value: 0.15

  Inner best params: {'C': 106.27492426881942, 'gamma': 0.014135387820591804, 'epsilon': 0.05164897354297209}, inner CV mean R2 = 0.9506
  Outer fold 1 metrics - R2: 0.9551, RMSE: 4.0462, MAE: 2.8969

--- Outer Fold 2/5 ---


[I 2026-01-18 12:18:07,044] Trial 1 finished with value: 0.5830497252000795 and parameters: {'C': 150.17978917395757, 'gamma': 0.13352908376995012, 'epsilon': 0.13047749129259045}. Best is trial 1 with value: 0.5830497252000795.
[I 2026-01-18 12:18:07,071] Trial 0 finished with value: 0.9181823223390525 and parameters: {'C': 42.85429071855334, 'gamma': 0.042984589934828225, 'epsilon': 0.1808791379133424}. Best is trial 0 with value: 0.9181823223390525.
[I 2026-01-18 12:18:07,087] Trial 3 finished with value: 0.021806856352883448 and parameters: {'C': 0.6572838811036964, 'gamma': 0.11118448075093916, 'epsilon': 0.01289947746652316}. Best is trial 0 with value: 0.9181823223390525.
[I 2026-01-18 12:18:07,094] Trial 6 finished with value: -0.040883809727904286 and parameters: {'C': 1.9716972064730856, 'gamma': 1.1670884432159463, 'epsilon': 0.001949559935294853}. Best is trial 0 with value: 0.9181823223390525.
[I 2026-01-18 12:18:07,147] Trial 2 finished with value: -0.018587994378207313 a

  Inner best params: {'C': 780.3175345515436, 'gamma': 0.009546388679830806, 'epsilon': 0.32835790648925367}, inner CV mean R2 = 0.9522
  Outer fold 2 metrics - R2: 0.8863, RMSE: 7.5774, MAE: 4.2062

--- Outer Fold 3/5 ---


[I 2026-01-18 12:18:11,842] Trial 3 finished with value: -0.03630796504505022 and parameters: {'C': 0.07627613857063525, 'gamma': 0.0015670923150635477, 'epsilon': 0.03005189304072348}. Best is trial 3 with value: -0.03630796504505022.
[I 2026-01-18 12:18:11,850] Trial 1 finished with value: 0.6261663523696023 and parameters: {'C': 28.2451998461408, 'gamma': 0.0005153130653166611, 'epsilon': 0.0025498342104146995}. Best is trial 1 with value: 0.6261663523696023.
[I 2026-01-18 12:18:11,873] Trial 2 finished with value: 0.8257698258337083 and parameters: {'C': 205.1433769128026, 'gamma': 0.00015504753513740117, 'epsilon': 0.007385736752089759}. Best is trial 2 with value: 0.8257698258337083.
[I 2026-01-18 12:18:11,896] Trial 0 finished with value: 0.8634778935443045 and parameters: {'C': 5.45928001376241, 'gamma': 0.01782066535076245, 'epsilon': 0.007628384096616736}. Best is trial 0 with value: 0.8634778935443045.
[I 2026-01-18 12:18:11,913] Trial 5 finished with value: 0.89739464983180

  Inner best params: {'C': 47.275793222184475, 'gamma': 0.011077076574486517, 'epsilon': 0.03710737029599883}, inner CV mean R2 = 0.9268
  Outer fold 3 metrics - R2: 0.9431, RMSE: 5.4703, MAE: 3.8714

--- Outer Fold 4/5 ---


[I 2026-01-18 12:18:16,597] Trial 0 finished with value: 0.22448892941664936 and parameters: {'C': 18.82461579586479, 'gamma': 0.000229857729077528, 'epsilon': 0.0053465286721840624}. Best is trial 0 with value: 0.22448892941664936.
[I 2026-01-18 12:18:16,626] Trial 1 finished with value: 0.5731782796912867 and parameters: {'C': 1.868672093650469, 'gamma': 0.018207695986134656, 'epsilon': 0.0013193036179265792}. Best is trial 1 with value: 0.5731782796912867.
[I 2026-01-18 12:18:16,659] Trial 7 finished with value: 0.9295937673313457 and parameters: {'C': 491.0716979751456, 'gamma': 0.0016974804777189802, 'epsilon': 0.6973486480933236}. Best is trial 7 with value: 0.9295937673313457.
[I 2026-01-18 12:18:16,666] Trial 4 finished with value: -0.02077782779126684 and parameters: {'C': 0.11073395814265208, 'gamma': 0.0009119267297149851, 'epsilon': 0.0024227883275522613}. Best is trial 7 with value: 0.9295937673313457.
[I 2026-01-18 12:18:16,693] Trial 10 finished with value: 0.84302390744

  Inner best params: {'C': 278.46640239086156, 'gamma': 0.010105693176327254, 'epsilon': 0.043120447702064524}, inner CV mean R2 = 0.9337
  Outer fold 4 metrics - R2: 0.9671, RMSE: 4.4445, MAE: 3.2301

--- Outer Fold 5/5 ---


[I 2026-01-18 12:18:21,788] Trial 0 finished with value: -0.040153542990083814 and parameters: {'C': 0.010312756522446738, 'gamma': 0.7402907051540272, 'epsilon': 0.004054832131388513}. Best is trial 0 with value: -0.040153542990083814.
[I 2026-01-18 12:18:21,920] Trial 3 finished with value: 0.2944379979958049 and parameters: {'C': 20.131555560990076, 'gamma': 0.17489725744061674, 'epsilon': 0.003263031572563642}. Best is trial 3 with value: 0.2944379979958049.
[I 2026-01-18 12:18:21,923] Trial 9 finished with value: -0.031453584254433974 and parameters: {'C': 0.019046473240579984, 'gamma': 0.004756967932646574, 'epsilon': 0.5914250391874974}. Best is trial 3 with value: 0.2944379979958049.
[I 2026-01-18 12:18:21,941] Trial 1 finished with value: -0.018846875046536666 and parameters: {'C': 27.42204163659823, 'gamma': 0.7388007054954147, 'epsilon': 0.023910028907836833}. Best is trial 3 with value: 0.2944379979958049.
[I 2026-01-18 12:18:21,952] Trial 5 finished with value: 0.064470175

  Inner best params: {'C': 85.84494366740265, 'gamma': 0.011614729374971511, 'epsilon': 0.006739335532246787}, inner CV mean R2 = 0.9612
  Outer fold 5 metrics - R2: 0.9428, RMSE: 5.1355, MAE: 3.5236

AM-VII-filtered - Nested CV summary:
  R2: 0.9389 Â± 0.0311
  RMSE: 5.3348 Â± 1.3730
  MAE: 3.5456 Â± 0.5156

Running final inner hyperparameter search on FULL data...


[I 2026-01-18 12:18:27,201] Trial 4 finished with value: 0.9244722957212209 and parameters: {'C': 548.8066262050478, 'gamma': 0.0006453506167406127, 'epsilon': 0.018390166812208713}. Best is trial 4 with value: 0.9244722957212209.
[I 2026-01-18 12:18:27,268] Trial 8 finished with value: 0.9047189353732081 and parameters: {'C': 177.41653222861672, 'gamma': 0.00044433660958743894, 'epsilon': 0.9115712324480848}. Best is trial 4 with value: 0.9244722957212209.
[I 2026-01-18 12:18:27,287] Trial 3 finished with value: 0.25777654200770606 and parameters: {'C': 6.647757161302349, 'gamma': 0.000566520216832614, 'epsilon': 0.23934894620080885}. Best is trial 4 with value: 0.9244722957212209.
[I 2026-01-18 12:18:27,298] Trial 1 finished with value: 0.9273078947931731 and parameters: {'C': 62.582210318126506, 'gamma': 0.003954852859412689, 'epsilon': 0.0022307649394154513}. Best is trial 1 with value: 0.9273078947931731.
[I 2026-01-18 12:18:27,369] Trial 5 finished with value: 0.02759252789214268

Final best params on FULL data: {'C': 282.3467484838021, 'gamma': 0.009611163905791791, 'epsilon': 0.04216017634183765}
Saved final model to: ./svr-model-other4/AM-VII-filtered_final_svr_model.joblib
Saved final scaler to: ./svr-model-other4/AM-VII-filtered_final_scaler.joblib
