# 1. Install and Import Libraries

In [None]:
!pip install optuna

In [None]:
import os
import json
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from google.colab import drive
import optuna

# 2. Environment Settings and Initialization

## 2.1. Set Seed for Reproducibility

In [None]:
SEED = 42
np.random.seed(SEED)

## 2.3. Manage Google Drive

### 2.3.1. Mount Google Drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


### 2.3.2. Set Folder Path

In [None]:
# Datasets
READ_PATH = '/content/drive/MyDrive/Bach_Thesis/Dataset/'

# Models
SAVE_ROOT = "/content/drive/MyDrive/Bach_Thesis/Models/SA_Check"
STUDY_ROOT = "/content/drive/MyDrive/Bach_Thesis/Models/SA_Optuna"

## 2.4. Initialize Global Parameters

In [None]:
LABELS = [0,1]
TARGET_NAMES = ['negative','positive']

## 2.5. Initialize Datasets

In [None]:
df_train_ori = pd.read_csv(f"{READ_PATH}Train2lab.csv")
df_train_ros = pd.read_csv(f"{READ_PATH}Train_ROS2lab.csv")
df_train_ros_ncl = pd.read_csv(f"{READ_PATH}Train_ROS_NCL2lab.csv")

In [None]:
df_val = pd.read_csv(f"{READ_PATH}Validation2lab.csv")
df_test = pd.read_csv(f"{READ_PATH}Test2lab.csv")

# 3. Helper Functions

In [None]:
def create_classification_report(y_true, y_pred):
    cr = classification_report(y_true, y_pred, labels=LABELS, target_names=TARGET_NAMES, zero_division=0, output_dict=True)
    df_cr = pd.DataFrame(cr).transpose().reset_index().rename(columns={'index':'label'})
    for col in df_cr.select_dtypes(include=['float']).columns:
        df_cr[col] = df_cr[col].round(4)

    return df_cr

In [None]:
def create_confusion_matrix(y_true, y_pred, save_path_png):
    cm = confusion_matrix(y_true, y_pred, labels=LABELS)
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=TARGET_NAMES, yticklabels=TARGET_NAMES)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    plt.savefig(save_path_png, dpi=150)
    plt.close()

In [None]:
def get_best_hyperparameter(save_root, model_name, dataset_name):
    model_dir = model_name.replace("/", "_")
    experiment_dir = Path(save_root) / model_dir / dataset_name
    optuna_dir = experiment_dir / "optuna_study"
    storage_path = optuna_dir / "study.db"
    study_name_path = optuna_dir / "study_name.txt"
    storage_uri = f"sqlite:///{storage_path}"

    if storage_path.exists() and study_name_path.exists():
        study_name = study_name_path.read_text().strip()

        try:
            loaded_study = optuna.load_study(
                study_name=study_name,
                storage=storage_uri
            )

            best_trial = loaded_study.best_trial
            return best_trial.params

        except Exception as e:
            print(f"An error occurred while loading study: {e}")
            return None
    else:
        print(f"Error: Study files not found at expected path: {optuna_dir}")
        return None

In [None]:
def get_hp_grid(best_hp, change_hp=None):
    if change_hp is None:
        return best_hp

    grid = best_hp.copy()
    for key, value in change_hp.items():
        grid[key] = value
    return grid

In [None]:
def svm_pipeline_run(df_train, df_val, df_test, best_hp, run_id, seed, save_root, dataset_name="dataset"):
    experiment_dir = Path(save_root) / "SVM_2" / dataset_name
    final_dir = experiment_dir / f"run_{run_id}"
    final_dir.mkdir(parents=True, exist_ok=True)
    print(f"Saving outputs to: {final_dir}")

    vectorizer = TfidfVectorizer(
        ngram_range=(1,2),
        sublinear_tf=True
    )
    X_train = vectorizer.fit_transform(df_train["cleaned_content"])
    y_train = df_train["sentiment"]
    X_val = vectorizer.transform(df_val["cleaned_content"])
    y_val = df_val["sentiment"]
    X_test = vectorizer.transform(df_test["cleaned_content"])
    y_test = df_test["sentiment"]

    model = SVC(
        C=best_hp["C"],
        kernel=best_hp["kernel"],
        gamma=best_hp.get("gamma", "scale"),
        random_state=seed,
        probability=True
    )
    model.fit(X_train, y_train)

    train_preds = model.predict(X_train)
    val_preds = model.predict(X_val)
    test_preds = model.predict(X_test)

    train_cr_df = create_classification_report(y_train, train_preds)
    val_cr_df = create_classification_report(y_val, val_preds)
    test_cr_df = create_classification_report(y_test, test_preds)

    train_cr_df.to_csv(final_dir / "classification_report_train.csv", index=False)
    val_cr_df.to_csv(final_dir / "classification_report_val.csv", index=False)
    test_cr_df.to_csv(final_dir / "classification_report_test.csv", index=False)

    create_confusion_matrix(y_train, train_preds, final_dir / "cm_train.png")
    create_confusion_matrix(y_val, val_preds, final_dir / "cm_val.png")
    create_confusion_matrix(y_test, test_preds, final_dir / "cm_test.png")

    summary = {
        "model": "SVM",
        "dataset": dataset_name,
        "run_id": run_id,
        "n_train": len(df_train),
        "n_val": len(df_val),
        "n_test": len(df_test),
        "train_weighted_f1": round(f1_score(y_train, train_preds, average='weighted'), 4),
        "val_weighted_f1": round(f1_score(y_val, val_preds, average='weighted'), 4),
        "test_weighted_f1": round(f1_score(y_test, test_preds, average='weighted'), 4),
        "train_accuracy": round((train_preds == y_train).mean(), 4),
        "val_accuracy": round((val_preds == y_val).mean(), 4),
        "test_accuracy": round((test_preds == y_test).mean(), 4),
        "hyperparameters": best_hp
    }

    with open(final_dir / "summary.json", "w") as f:
        json.dump(summary, f, indent=2)

    pd.json_normalize(summary).to_csv(final_dir / "summary_metrics.csv", index=False)

    joblib.dump(model, final_dir / "svm_model.pkl")
    joblib.dump(vectorizer, final_dir / "tfidf.pkl")

    print(f"✅ SVM run {run_id} completed.\n")
    return summary

In [None]:
def check_svm_loop_runs(df_train, df_val, df_test, best_hp, hp_change_list, seed=42, save_root=SAVE_ROOT, dataset_name="dataset"):
    results = []

    for i, change_hp in enumerate(hp_change_list, start=1):
        print(f"GENERATING MODEL {i}")
        print(f"Override: {change_hp}")

        final_hp = get_hp_grid(best_hp, change_hp)
        print(f"Final hyperparameters used: {final_hp}")

        summary = svm_pipeline_run(
            df_train=df_train,
            df_val=df_val,
            df_test=df_test,
            best_hp=final_hp,
            run_id=i,
            seed=seed,
            save_root=save_root,
            dataset_name=dataset_name
        )

        results.append(summary)

    return pd.DataFrame(results)

# 4. Run Models

# original

In [None]:
list_ori = [
    {"C": 0.01},
    {"C": 2},
    {"kernel": "rbf"},
    {"kernel": "poly"},
    {"kernel": "linear"},
    {"gamma": "auto"}
]

In [None]:
best_hp_ori=get_best_hyperparameter(
    save_root=STUDY_ROOT,
    model_name="SVM_2",
    dataset_name="original"
)

In [None]:
check_svm_loop_runs(
    df_train=df_train_ori,
    df_val=df_val,
    df_test=df_test,
    best_hp=best_hp_ori,
    hp_change_list=list_ori,
    seed=SEED,
    save_root=SAVE_ROOT,
    dataset_name="ori-check-1"
)

GENERATING MODEL 1
Override: {'C': 0.01}
Final hyperparameters used: {'C': 0.01, 'kernel': 'sigmoid', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ori-check-1/run_1
✅ SVM run 1 completed.

GENERATING MODEL 2
Override: {'C': 2}
Final hyperparameters used: {'C': 2, 'kernel': 'sigmoid', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ori-check-1/run_2
✅ SVM run 2 completed.

GENERATING MODEL 3
Override: {'kernel': 'rbf'}
Final hyperparameters used: {'C': 0.8983851664279286, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ori-check-1/run_3
✅ SVM run 3 completed.

GENERATING MODEL 4
Override: {'kernel': 'poly'}
Final hyperparameters used: {'C': 0.8983851664279286, 'kernel': 'poly', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ori-check-1/run_4
✅ SVM run 4 completed.

GENERATING MODEL 5
O

Unnamed: 0,model,dataset,run_id,n_train,n_val,n_test,train_weighted_f1,val_weighted_f1,test_weighted_f1,train_accuracy,val_accuracy,test_accuracy,hyperparameters
0,SVM,ori-check-1,1,9913,2124,2125,0.5015,0.521,0.5239,0.6355,0.6445,0.6461,"{'C': 0.01, 'kernel': 'sigmoid', 'gamma': 'sca..."
1,SVM,ori-check-1,2,9913,2124,2125,0.9906,0.9448,0.949,0.9906,0.9444,0.9487,"{'C': 2, 'kernel': 'sigmoid', 'gamma': 'scale'}"
2,SVM,ori-check-1,3,9913,2124,2125,0.9919,0.943,0.9453,0.9919,0.9426,0.9449,"{'C': 0.8983851664279286, 'kernel': 'rbf', 'ga..."
3,SVM,ori-check-1,4,9913,2124,2125,0.9965,0.9242,0.9293,0.9965,0.9233,0.9285,"{'C': 0.8983851664279286, 'kernel': 'poly', 'g..."
4,SVM,ori-check-1,5,9913,2124,2125,0.9886,0.9453,0.9486,0.9886,0.9449,0.9482,"{'C': 0.8983851664279286, 'kernel': 'linear', ..."
5,SVM,ori-check-1,6,9913,2124,2125,0.4855,0.4857,0.4854,0.6288,0.629,0.6287,"{'C': 0.8983851664279286, 'kernel': 'sigmoid',..."


# ros

In [None]:
list_ros = [
    {"C": 60},
    {"C": 100},
    {"kernel": "sigmoid"},
    {"kernel": "poly"},
    {"kernel": "linear"},
    {"gamma": "auto"}
]

In [None]:
best_hp_ros=get_best_hyperparameter(
    save_root=STUDY_ROOT,
    model_name="SVM_2",
    dataset_name="ROS"
)

In [None]:
check_svm_loop_runs(
    df_train=df_train_ros,
    df_val=df_val,
    df_test=df_test,
    best_hp=best_hp_ros,
    hp_change_list=list_ros,
    seed=SEED,
    save_root=SAVE_ROOT,
    dataset_name="ros-check-1"
)

GENERATING MODEL 1
Override: {'C': 60}
Final hyperparameters used: {'C': 60, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-1/run_1
✅ SVM run 1 completed.

GENERATING MODEL 2
Override: {'C': 100}
Final hyperparameters used: {'C': 100, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-1/run_2
✅ SVM run 2 completed.

GENERATING MODEL 3
Override: {'kernel': 'sigmoid'}
Final hyperparameters used: {'C': 87.69063608995286, 'kernel': 'sigmoid', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-1/run_3
✅ SVM run 3 completed.

GENERATING MODEL 4
Override: {'kernel': 'poly'}
Final hyperparameters used: {'C': 87.69063608995286, 'kernel': 'poly', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-1/run_4
✅ SVM run 4 completed.

GENERATING MODEL 5
Ove

Unnamed: 0,model,dataset,run_id,n_train,n_val,n_test,train_weighted_f1,val_weighted_f1,test_weighted_f1,train_accuracy,val_accuracy,test_accuracy,hyperparameters
0,SVM,ros-check-1,1,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 60, 'kernel': 'rbf', 'gamma': 'scale'}"
1,SVM,ros-check-1,2,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 100, 'kernel': 'rbf', 'gamma': 'scale'}"
2,SVM,ros-check-1,3,12466,2124,2125,0.9958,0.8783,0.8857,0.9958,0.8781,0.8856,"{'C': 87.69063608995286, 'kernel': 'sigmoid', ..."
3,SVM,ros-check-1,4,12466,2124,2125,0.9966,0.9283,0.9307,0.9966,0.928,0.9304,"{'C': 87.69063608995286, 'kernel': 'poly', 'ga..."
4,SVM,ros-check-1,5,12466,2124,2125,0.9968,0.9391,0.9392,0.9968,0.9388,0.9388,"{'C': 87.69063608995286, 'kernel': 'linear', '..."
5,SVM,ros-check-1,6,12466,2124,2125,0.5435,0.4739,0.4437,0.6115,0.525,0.5045,"{'C': 87.69063608995286, 'kernel': 'rbf', 'gam..."


In [None]:
list_ros = [
    {'C': 45},
    {'C': 30},
    {'C': 15},
    {'C': 10},
    {'C': 5}
]

In [None]:
best_hp_ros=get_best_hyperparameter(
    save_root=STUDY_ROOT,
    model_name="SVM_2",
    dataset_name="ROS"
)

In [None]:
check_svm_loop_runs(
    df_train=df_train_ros,
    df_val=df_val,
    df_test=df_test,
    best_hp=best_hp_ros,
    hp_change_list=list_ros,
    seed=SEED,
    save_root=SAVE_ROOT,
    dataset_name="ros-check-2"
)

GENERATING MODEL 1
Override: {'C': 45}
Final hyperparameters used: {'C': 45, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-2/run_1
✅ SVM run 1 completed.

GENERATING MODEL 2
Override: {'C': 30}
Final hyperparameters used: {'C': 30, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-2/run_2
✅ SVM run 2 completed.

GENERATING MODEL 3
Override: {'C': 15}
Final hyperparameters used: {'C': 15, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-2/run_3
✅ SVM run 3 completed.

GENERATING MODEL 4
Override: {'C': 10}
Final hyperparameters used: {'C': 10, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-2/run_4
✅ SVM run 4 completed.

GENERATING MODEL 5
Override: {'C': 5}
Final hyperparameters used: {'C': 5, 'kern

Unnamed: 0,model,dataset,run_id,n_train,n_val,n_test,train_weighted_f1,val_weighted_f1,test_weighted_f1,train_accuracy,val_accuracy,test_accuracy,hyperparameters
0,SVM,ros-check-2,1,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 45, 'kernel': 'rbf', 'gamma': 'scale'}"
1,SVM,ros-check-2,2,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 30, 'kernel': 'rbf', 'gamma': 'scale'}"
2,SVM,ros-check-2,3,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 15, 'kernel': 'rbf', 'gamma': 'scale'}"
3,SVM,ros-check-2,4,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 10, 'kernel': 'rbf', 'gamma': 'scale'}"
4,SVM,ros-check-2,5,12466,2124,2125,0.9968,0.9462,0.9467,0.9968,0.9459,0.9464,"{'C': 5, 'kernel': 'rbf', 'gamma': 'scale'}"


In [None]:
list_ros = [
    {'C': 1},
    {'C': 1e-2},
    {'C': 1e-3},
    {'C': 1e-4},
    {'C': 1e-5},
    {'C': 1e-6},
]

In [None]:
best_hp_ros=get_best_hyperparameter(
    save_root=STUDY_ROOT,
    model_name="SVM_2",
    dataset_name="ROS"
)

In [None]:
check_svm_loop_runs(
    df_train=df_train_ros,
    df_val=df_val,
    df_test=df_test,
    best_hp=best_hp_ros,
    hp_change_list=list_ros,
    seed=SEED,
    save_root=SAVE_ROOT,
    dataset_name="ros-check-3"
)

GENERATING MODEL 1
Override: {'C': 1}
Final hyperparameters used: {'C': 1, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-3/run_1
✅ SVM run 1 completed.

GENERATING MODEL 2
Override: {'C': 0.01}
Final hyperparameters used: {'C': 0.01, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-3/run_2
✅ SVM run 2 completed.

GENERATING MODEL 3
Override: {'C': 0.001}
Final hyperparameters used: {'C': 0.001, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-3/run_3
✅ SVM run 3 completed.

GENERATING MODEL 4
Override: {'C': 0.0001}
Final hyperparameters used: {'C': 0.0001, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ros-check-3/run_4
✅ SVM run 4 completed.

GENERATING MODEL 5
Override: {'C': 1e-05}
Final hyperparameters 

Unnamed: 0,model,dataset,run_id,n_train,n_val,n_test,train_weighted_f1,val_weighted_f1,test_weighted_f1,train_accuracy,val_accuracy,test_accuracy,hyperparameters
0,SVM,ros-check-3,1,12466,2124,2125,0.9935,0.9448,0.9462,0.9935,0.9444,0.9459,"{'C': 1, 'kernel': 'rbf', 'gamma': 'scale'}"
1,SVM,ros-check-3,2,12466,2124,2125,0.5697,0.5164,0.4823,0.6281,0.5546,0.5304,"{'C': 0.01, 'kernel': 'rbf', 'gamma': 'scale'}"
2,SVM,ros-check-3,3,12466,2124,2125,0.5006,0.4139,0.3866,0.5855,0.4859,0.4687,"{'C': 0.001, 'kernel': 'rbf', 'gamma': 'scale'}"
3,SVM,ros-check-3,4,12466,2124,2125,0.5006,0.4139,0.3866,0.5855,0.4859,0.4687,"{'C': 0.0001, 'kernel': 'rbf', 'gamma': 'scale'}"
4,SVM,ros-check-3,5,12466,2124,2125,0.5006,0.4139,0.3866,0.5855,0.4859,0.4687,"{'C': 1e-05, 'kernel': 'rbf', 'gamma': 'scale'}"
5,SVM,ros-check-3,6,12466,2124,2125,0.5006,0.4139,0.3866,0.5855,0.4859,0.4687,"{'C': 1e-06, 'kernel': 'rbf', 'gamma': 'scale'}"


# ros-ncl


In [None]:
list_rosncl = [
    {"C": 0.5},
    {"C": 2.5},
    {"kernel": "sigmoid"},
    {"kernel": "poly"},
    {"kernel": "rbf"}
]

In [None]:
best_hp_ros_ncl=get_best_hyperparameter(
    save_root=STUDY_ROOT,
    model_name="SVM_2",
    dataset_name="ROS-NCL"
)

In [None]:
check_svm_loop_runs(
    df_train=df_train_ros_ncl,
    df_val=df_val,
    df_test=df_test,
    best_hp=best_hp_ros_ncl,
    hp_change_list=list_rosncl,
    seed=SEED,
    save_root=SAVE_ROOT,
    dataset_name="rosncl-check-1"
)

GENERATING MODEL 1
Override: {'C': 0.5}
Final hyperparameters used: {'C': 0.5, 'kernel': 'linear', 'gamma': 'auto'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ROS-NCL/run_1
✅ SVM run 1 completed.

GENERATING MODEL 2
Override: {'C': 2.5}
Final hyperparameters used: {'C': 2.5, 'kernel': 'linear', 'gamma': 'auto'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ROS-NCL/run_2
✅ SVM run 2 completed.

GENERATING MODEL 3
Override: {'kernel': 'sigmoid'}
Final hyperparameters used: {'C': 1.334840493584601, 'kernel': 'sigmoid', 'gamma': 'auto'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ROS-NCL/run_3
✅ SVM run 3 completed.

GENERATING MODEL 4
Override: {'kernel': 'poly'}
Final hyperparameters used: {'C': 1.334840493584601, 'kernel': 'poly', 'gamma': 'auto'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/ROS-NCL/run_4
✅ SVM run 4 completed.

GENERATING MODEL 5
Override: {'ker

Unnamed: 0,model,dataset,run_id,n_train,n_val,n_test,train_weighted_f1,val_weighted_f1,test_weighted_f1,train_accuracy,val_accuracy,test_accuracy,hyperparameters
0,SVM,ROS-NCL,1,10564,2124,2125,0.9975,0.9434,0.9477,0.9975,0.943,0.9473,"{'C': 0.5, 'kernel': 'linear', 'gamma': 'auto'}"
1,SVM,ROS-NCL,2,10564,2124,2125,0.9995,0.9458,0.9467,0.9995,0.9454,0.9464,"{'C': 2.5, 'kernel': 'linear', 'gamma': 'auto'}"
2,SVM,ROS-NCL,3,10564,2124,2125,0.3794,0.4857,0.4854,0.5406,0.629,0.6287,"{'C': 1.334840493584601, 'kernel': 'sigmoid', ..."
3,SVM,ROS-NCL,4,10564,2124,2125,0.3794,0.4857,0.4854,0.5406,0.629,0.6287,"{'C': 1.334840493584601, 'kernel': 'poly', 'ga..."
4,SVM,ROS-NCL,5,10564,2124,2125,0.3794,0.4857,0.4854,0.5406,0.629,0.6287,"{'C': 1.334840493584601, 'kernel': 'rbf', 'gam..."


In [None]:
list_rosncl = [
    {'kernel': 'sigmoid', 'gamma': 'scale'},
    {'kernel': 'poly', 'gamma': 'scale'},
    {'kernel': 'rbf', 'gamma': 'scale'}
]

In [None]:
best_hp_ros_ncl=get_best_hyperparameter(
    save_root=STUDY_ROOT,
    model_name="SVM_2",
    dataset_name="ROS-NCL"
)

In [None]:
check_svm_loop_runs(
    df_train=df_train_ros_ncl,
    df_val=df_val,
    df_test=df_test,
    best_hp=best_hp_ros_ncl,
    hp_change_list=list_rosncl,
    seed=SEED,
    save_root=SAVE_ROOT,
    dataset_name="rosncl-check-2"
)

GENERATING MODEL 1
Override: {'kernel': 'sigmoid', 'gamma': 'scale'}
Final hyperparameters used: {'C': 1.334840493584601, 'kernel': 'sigmoid', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/rosncl-check-2/run_1
✅ SVM run 1 completed.

GENERATING MODEL 2
Override: {'kernel': 'poly', 'gamma': 'scale'}
Final hyperparameters used: {'C': 1.334840493584601, 'kernel': 'poly', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/rosncl-check-2/run_2
✅ SVM run 2 completed.

GENERATING MODEL 3
Override: {'kernel': 'rbf', 'gamma': 'scale'}
Final hyperparameters used: {'C': 1.334840493584601, 'kernel': 'rbf', 'gamma': 'scale'}
Saving outputs to: /content/drive/MyDrive/Bach_Thesis/Models/SA_Check/SVM_2/rosncl-check-2/run_3
✅ SVM run 3 completed.



Unnamed: 0,model,dataset,run_id,n_train,n_val,n_test,train_weighted_f1,val_weighted_f1,test_weighted_f1,train_accuracy,val_accuracy,test_accuracy,hyperparameters
0,SVM,rosncl-check-2,1,10564,2124,2125,0.999,0.9472,0.9505,0.999,0.9468,0.9501,"{'C': 1.334840493584601, 'kernel': 'sigmoid', ..."
1,SVM,rosncl-check-2,2,10564,2124,2125,0.9994,0.9321,0.9308,0.9994,0.9317,0.9304,"{'C': 1.334840493584601, 'kernel': 'poly', 'ga..."
2,SVM,rosncl-check-2,3,10564,2124,2125,0.9995,0.942,0.9472,0.9995,0.9416,0.9468,"{'C': 1.334840493584601, 'kernel': 'rbf', 'gam..."
