In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import optuna
import matplotlib.pyplot as plt
import seaborn as sns

RND = 42


df = pd.read_csv("cleaned_data.csv")

target = "Cover_Type"
X = df.drop(columns=[target])
y = df[target]




In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=RND, stratify=y
)

print("Train:", X_train.shape, " Test:", X_test.shape)


In [None]:
def objective(trial):
    # Smaller + safer search space (MUCH faster)
    C = trial.suggest_float("C", 1e-3, 50, log=True)
    loss = trial.suggest_categorical("loss", ["hinge", "squared_hinge"])
    tol = trial.suggest_float("tol", 1e-5, 1e-2, log=True)
    fit_intercept = trial.suggest_categorical("fit_intercept", [True, False])

    # ðŸ”¥ Always dual=True for speed + stability
    dual = True

    # only tune when needed
    if fit_intercept:
        intercept_scaling = trial.suggest_float("intercept_scaling", 0.5, 5, log=True)
    else:
        intercept_scaling = 1.0

    # smaller max_iter range (important for speed)
    max_iter = trial.suggest_int("max_iter", 1000, 3000, step=500)

    # validation split
    X_tr, X_val, y_tr, y_val = train_test_split(
        X_train, y_train, test_size=0.20,
        random_state=RND, stratify=y_train
    )

    model = LinearSVC(
        C=C,
        loss=loss,
        tol=tol,
        dual=dual,                 
        fit_intercept=fit_intercept,
        intercept_scaling=intercept_scaling,
        class_weight="balanced",
        max_iter=max_iter,
        random_state=RND
    )

    model.fit(X_tr, y_tr)
    preds = model.predict(X_val)

    return accuracy_score(y_val, preds)




In [None]:

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20, show_progress_bar=True)

print("Best Score:", study.best_value)
print("Best Params:", study.best_params)



In [None]:
best = study.best_params
best = study.best_params

final_model = LinearSVC(
    C=best["C"],
    loss=best["loss"],
    tol=best["tol"],
    dual=True,    # forced fast mode
    fit_intercept=best["fit_intercept"],
    intercept_scaling=best.get("intercept_scaling", 1.0),
    class_weight="balanced",
    max_iter=best["max_iter"],
    random_state=RND
)

final_model.fit(X_train, y_train)



In [None]:
y_pred = final_model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print("Test Accuracy:", acc)

print("\nClassification Report:\n", classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)

# plot confusion matrix
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()
