In [1]:
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Generate synthetic classification dataset
X, y = make_classification(
    n_samples=1000, n_features=20, n_informative=10, n_redundant=5, 
    n_classes=3, n_clusters_per_class=2, random_state=42
)

In [3]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [4]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Define models and hyperparameter grids
models = {
    "Logistic Regression": {
        "model": LogisticRegression(max_iter=1000, multi_class='auto'),
        "params": {
            "C": [0.1, 1, 10],
            "solver": ['lbfgs', 'liblinear']
        }
    },
    "Random Forest": {
        "model": RandomForestClassifier(random_state=42),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [None, 10, 20]
        }
    },
    "SVM": {
        "model": SVC(probability=True),
        "params": {
            "C": [0.1, 1, 10],
            "kernel": ['linear', 'rbf']
        }
    },
    "KNN": {
        "model": KNeighborsClassifier(),
        "params": {
            "n_neighbors": [3, 5, 7],
            "weights": ['uniform', 'distance']
        }
    }
}

best_models = {}

In [6]:
for name, mp in models.items():
    print(f"\n Tuning {name}...")
    grid = GridSearchCV(mp['model'], mp['params'], cv=5, scoring='accuracy', n_jobs=-1)
    grid.fit(X_train_scaled, y_train)
    best_models[name] = grid.best_estimator_

    y_pred = grid.predict(X_test_scaled)
    acc = accuracy_score(y_test, y_pred)
    print(f" Best Parameters: {grid.best_params_}")
    print(f" Accuracy: {acc:.4f}")
    print(" Classification Report:")
    print(classification_report(y_test, y_pred))
    print("-" * 50)


 Tuning Logistic Regression...
 Best Parameters: {'C': 0.1, 'solver': 'lbfgs'}
 Accuracy: 0.6450
 Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.45      0.48        66
           1       0.67      0.78      0.72        67
           2       0.76      0.70      0.73        67

    accuracy                           0.65       200
   macro avg       0.64      0.64      0.64       200
weighted avg       0.64      0.65      0.64       200

--------------------------------------------------

 Tuning Random Forest...
 Best Parameters: {'max_depth': 20, 'n_estimators': 100}
 Accuracy: 0.7800
 Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.67      0.68        66
           1       0.77      0.88      0.82        67
           2       0.90      0.79      0.84        67

    accuracy                           0.78       200
   macro avg       0.78      0.78      0.78      