In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
                             mean_squared_error, mean_absolute_error, r2_score)
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, SVR
from sklearn.neural_network import MLPClassifier, MLPRegressor
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Load dataset (assumes last column is target)
df = pd.read_csv("data.csv")

# Basic info
print(df.shape)
df.head()


In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Detect task type
task = "classification" if len(np.unique(y)) <= 20 else "regression"
print("Detected task type:", task)

if task == "classification":
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42)
else:
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)


In [None]:
if task == "classification":
    models = {
        "Logistic Regression": make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000)),
        "Decision Tree": DecisionTreeClassifier(random_state=42),
        "Naive Bayes": make_pipeline(StandardScaler(), GaussianNB()),
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM (RBF Kernel)": make_pipeline(StandardScaler(), SVC(kernel="rbf", probability=True))
    }
else:
    models = {
        "Linear Regression": LinearRegression(),
        "Decision Tree Regressor": DecisionTreeRegressor(random_state=42),
        "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
        "SVR (RBF Kernel)": make_pipeline(StandardScaler(), SVR(kernel="rbf")),
        "Neural Network (MLPRegressor)": make_pipeline(StandardScaler(), MLPRegressor(hidden_layer_sizes=(64,32), max_iter=1000, random_state=42))
    }


In [None]:
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    row = {"Model": name}
    
    if task == "classification":
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="weighted", zero_division=0)
        rec = recall_score(y_test, y_pred, average="weighted", zero_division=0)
        f1 = f1_score(y_test, y_pred, average="weighted", zero_division=0)
        try:
            y_prob = model.predict_proba(X_test)
            auc = roc_auc_score(y_test, y_prob[:, 1]) if y_prob.shape[1] == 2 else np.nan
        except:
            auc = np.nan
        
        row.update({"Accuracy": acc, "Precision": prec, "Recall": rec, "F1": f1, "AUC": auc})
        
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        row["CV Accuracy Mean"] = cross_val_score(model, X, y, cv=cv, scoring="accuracy").mean()
        
    else:
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        row.update({"RMSE": rmse, "MAE": mae, "R²": r2})
        
        cv = KFold(n_splits=5, shuffle=True, random_state=42)
        row["CV R² Mean"] = cross_val_score(model, X, y, cv=cv, scoring="r2").mean()
        
    results.append(row)

results_df = pd.DataFrame(results)
results_df


In [None]:
plt.figure(figsize=(10,5))
metric = "Accuracy" if task == "classification" else "R²"
sns.barplot(data=results_df, x="Model", y=metric)
plt.xticks(rotation=45, ha='right')
plt.title(f"{metric} Comparison of Models")
plt.show()
