In [None]:
import pandas as pd
import joblib

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


train_df = pd.read_csv("../data/interim/train_split.csv")
val_df   = pd.read_csv("../data/interim/validation_split.csv")

target_col = "Cover_Type"

# Separate features & target
X_train_scaled = train_df.drop(target_col, axis=1)
y_train = train_df[target_col]

X_val_scaled = val_df.drop(target_col, axis=1)
y_val   = val_df[target_col]


# ---- Logistic Regression (Multiclass) ----
log_reg = LogisticRegression(
    solver="lbfgs",
    max_iter=1000,
    random_state=42
)

log_reg.fit(X_train_scaled, y_train)
val_pred_log = log_reg.predict(X_val_scaled)
val_acc_log  = accuracy_score(y_val, val_pred_log)
print(f"Logistic Regression Validation Accuracy: {val_acc_log:.4f}")

joblib.dump(log_reg, "../models/logreg_multiclass.joblib")
print("Saved Logistic Regression model as logreg_multiclass.joblib")

#0.7238

### SVM

In [None]:

svm_rbf = SVC(
    kernel="rbf",
    C=1.0,
    gamma="scale",
    decision_function_shape="ovr",
    random_state=42
)

svm_rbf.fit(X_train_scaled, y_train)
val_pred_svm = svm_rbf.predict(X_val_scaled)
val_acc_svm  = accuracy_score(y_val, val_pred_svm)
print(f"SVM (RBF) Validation Accuracy: {val_acc_svm:.4f}")

joblib.dump(svm_rbf, "../models/svm_rbf_multiclass.joblib")
#0.8306


### NN

In [None]:


nn_model = MLPClassifier(
    hidden_layer_sizes=(100,),
    activation="relu",
    solver="adam",
    max_iter=500,
    random_state=42
)

nn_model.fit(X_train_scaled, y_train)
val_pred_nn = nn_model.predict(X_val_scaled)
val_acc_nn  = accuracy_score(y_val, val_pred_nn)
print(f"Neural Network Validation Accuracy: {val_acc_nn:.4f}")

joblib.dump(nn_model, "../models/mlp_multiclass.joblib")
print("Saved Neural Network model as mlp_multiclass.joblib")
#0.8763

In [None]:

svm_rbf = SVC(
    kernel="rbf",
    C=1.0,
    gamma="scale",
    decision_function_shape="ovr",
    random_state=42
)

svm_rbf.fit(X_train_scaled, y_train)
val_pred_svm = svm_rbf.predict(X_val_scaled)
val_acc_svm  = accuracy_score(y_val, val_pred_svm)
print(f"SVM (RBF) Validation Accuracy: {val_acc_svm:.4f}")

joblib.dump(svm_rbf, "../models/svm_rbf_multiclass.joblib")
#0.8306


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

# Instantiate the SVM model
svm = SVC()

# Define the parameter grid
parameters = {'C': [0.1, 1, 10], 'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1]}

# Instantiate GridSearchCV
searcher = GridSearchCV(svm, parameters, cv=5)

# Run the search on the training data
searcher.fit(X_train_scaled, y_train)

# Print the best parameters found
print("Best CV params:", searcher.best_params_)

# Print the cross-validation accuracy
print("Best CV accuracy:", searcher.best_score_)

# Evaluate accuracy on the test set
test_accuracy = searcher.score(X_test, y_test)

print("Test accuracy of best grid search hypers:", test_accuracy)
