In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. GridSearchCV
grid = GridSearchCV(pipe, param_grid)
grid.fit(X_train, y_train)

# 5. Results
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))


# New Section

1. Load dataset


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# 1. Load dataset
X, y = load_iris(return_X_y=True)  # Loads features and target labels

# 2. Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)



2. Create pipeline

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])


3. Define parameter grid

In [None]:
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}


4. GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(pipe, param_grid)  # Performs exhaustive search over the param_grid
grid.fit(X_train, y_train)             # Trains models for all combinations on training data


5. Results

In [None]:
print("Best parameters found:", grid.best_params_)
print("Best cross-validation score: {:.2f}".format(grid.best_score_))
print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))


Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00


Check for 3 fold, 5 fold and 7 fold cross validation

Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

Also replace Gridsearch with randomnsearch function.

Relplace with with your own csv dataset using code below:

In [None]:
import pandas as pd

data = pd.read_csv("your_dataset.csv")
X = data.drop("target_column", axis=1)
y = data["target_column"]


Check for 3 fold, 5 fold and 7 fold cross validation

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Create pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('classifier', SVC())
])

# 3. Define parameter grid
param_grid = {
    'pca__n_components': [2, 3],
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf']
}

# 4. Evaluate using different cross-validation folds
for cv in [3, 5, 7]:
    print(f"\n🔁 Results for {cv}-Fold Cross-Validation:")

    grid = GridSearchCV(pipe, param_grid, cv=cv)
    grid.fit(X_train, y_train)

    print("Best parameters found:", grid.best_params_)
    print("Best cross-validation score: {:.2f}".format(grid.best_score_))
    print("Test set score: {:.2f}".format(grid.score(X_test, y_test)))



🔁 Results for 3-Fold Cross-Validation:
Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.97
Test set score: 1.00

🔁 Results for 5-Fold Cross-Validation:
Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.96
Test set score: 1.00

🔁 Results for 7-Fold Cross-Validation:
Best parameters found: {'classifier__C': 0.1, 'classifier__kernel': 'linear', 'pca__n_components': 3}
Best cross-validation score: 0.97
Test set score: 1.00


Replace classifier, SVC with RandomForestClassifier and LogisticRegression, Perceptron, knn .

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# 1. Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Define classifiers and parameter grids
models = {
    "RandomForest": (
        RandomForestClassifier(),
        {
            'classifier__n_estimators': [50, 100, 150],
            'classifier__max_depth': [3, 5, None]
        }
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=1000),
        {
            'classifier__C': [0.1, 1, 10],
            'classifier__penalty': ['l2'],
            'classifier__solver': ['lbfgs']
        }
    ),
    "Perceptron": (
        Perceptron(),
        {
            'classifier__penalty': ['l2', 'l1', 'elasticnet'],
            'classifier__alpha': [0.0001, 0.001, 0.01]
        }
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': [3, 5, 7],
            'classifier__weights': ['uniform', 'distance']
        }
    )
}

# 3. Run GridSearchCV for each classifier and fold
for cv in [3, 5, 7]:
    print(f"\n==============================")
    print(f"🔁 {cv}-Fold Cross-Validation")
    print("==============================")

    for name, (model, param_grid) in models.items():
        print(f"\n▶ Classifier: {name}")

        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=3)),
            ('classifier', model)
        ])

        grid = GridSearchCV(pipe, param_grid, cv=cv)
        grid.fit(X_train, y_train)

        print("  Best Parameters:", grid.best_params_)
        print("  Best CV Score: {:.2f}".format(grid.best_score_))
        print("  Test Set Accuracy: {:.2f}".format(grid.score(X_test, y_test)))



🔁 3-Fold Cross-Validation

▶ Classifier: RandomForest
  Best Parameters: {'classifier__max_depth': 5, 'classifier__n_estimators': 100}
  Best CV Score: 0.96
  Test Set Accuracy: 1.00

▶ Classifier: LogisticRegression
  Best Parameters: {'classifier__C': 1, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}
  Best CV Score: 0.96
  Test Set Accuracy: 1.00

▶ Classifier: Perceptron
  Best Parameters: {'classifier__alpha': 0.01, 'classifier__penalty': 'elasticnet'}
  Best CV Score: 0.91
  Test Set Accuracy: 0.93

▶ Classifier: KNN
  Best Parameters: {'classifier__n_neighbors': 3, 'classifier__weights': 'uniform'}
  Best CV Score: 0.96
  Test Set Accuracy: 1.00

🔁 5-Fold Cross-Validation

▶ Classifier: RandomForest
  Best Parameters: {'classifier__max_depth': None, 'classifier__n_estimators': 100}
  Best CV Score: 0.94
  Test Set Accuracy: 0.97

▶ Classifier: LogisticRegression
  Best Parameters: {'classifier__C': 1, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}
  Be

Update the param_grid accordingly (e.g., for RandomForestClassifier, use n_estimators, max_depth, etc.)

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier

# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers and their specific param_grids
models = {
    "RandomForest": (
        RandomForestClassifier(),
        {
            'classifier__n_estimators': [50, 100, 150],
            'classifier__max_depth': [3, 5, None]
        }
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=1000),
        {
            'classifier__C': [0.1, 1, 10],
            'classifier__penalty': ['l2'],
            'classifier__solver': ['lbfgs']
        }
    ),
    "Perceptron": (
        Perceptron(),
        {
            'classifier__penalty': ['l1', 'l2', 'elasticnet'],
            'classifier__alpha': [0.0001, 0.001, 0.01]
        }
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': [3, 5, 7],
            'classifier__weights': ['uniform', 'distance']
        }
    )
}

# Run with 3, 5, and 7-fold CV
for cv in [3, 5, 7]:
    print(f"\n==============================")
    print(f"🔁 {cv}-Fold Cross-Validation")
    print("==============================")

    for name, (model, param_grid) in models.items():
        print(f"\n▶ Classifier: {name}")

        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=3)),
            ('classifier', model)
        ])

        grid = GridSearchCV(pipe, param_grid, cv=cv)
        grid.fit(X_train, y_train)

        print("  Best Parameters:", grid.best_params_)
        print("  Best CV Score: {:.2f}".format(grid.best_score_))
        print("  Test Set Accuracy: {:.2f}".format(grid.score(X_test, y_test)))



🔁 3-Fold Cross-Validation

▶ Classifier: RandomForest
  Best Parameters: {'classifier__max_depth': None, 'classifier__n_estimators': 150}
  Best CV Score: 0.95
  Test Set Accuracy: 1.00

▶ Classifier: LogisticRegression
  Best Parameters: {'classifier__C': 1, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}
  Best CV Score: 0.96
  Test Set Accuracy: 1.00

▶ Classifier: Perceptron
  Best Parameters: {'classifier__alpha': 0.01, 'classifier__penalty': 'elasticnet'}
  Best CV Score: 0.91
  Test Set Accuracy: 0.93

▶ Classifier: KNN
  Best Parameters: {'classifier__n_neighbors': 3, 'classifier__weights': 'uniform'}
  Best CV Score: 0.96
  Test Set Accuracy: 1.00

🔁 5-Fold Cross-Validation

▶ Classifier: RandomForest
  Best Parameters: {'classifier__max_depth': 5, 'classifier__n_estimators': 50}
  Best CV Score: 0.94
  Test Set Accuracy: 1.00

▶ Classifier: LogisticRegression
  Best Parameters: {'classifier__C': 1, 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}
  Bes

Also replace Gridsearch with randomnsearch function.

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import randint, uniform

# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers and their random search parameter distributions
models = {
    "RandomForest": (
        RandomForestClassifier(),
        {
            'classifier__n_estimators': randint(50, 200),
            'classifier__max_depth': randint(3, 10)
        }
    ),
    "LogisticRegression": (
        LogisticRegression(max_iter=1000),
        {
            'classifier__C': uniform(0.01, 10),
            'classifier__penalty': ['l2'],
            'classifier__solver': ['lbfgs']
        }
    ),
    "Perceptron": (
        Perceptron(),
        {
            'classifier__penalty': ['l1', 'l2', 'elasticnet'],
            'classifier__alpha': uniform(0.0001, 0.01)
        }
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            'classifier__n_neighbors': randint(3, 10),
            'classifier__weights': ['uniform', 'distance']
        }
    )
}

# Run with 3, 5, and 7-fold CV
for cv in [3, 5, 7]:
    print(f"\n==============================")
    print(f"🔁 {cv}-Fold Cross-Validation (RandomizedSearch)")
    print("==============================")

    for name, (model, param_dist) in models.items():
        print(f"\n▶ Classifier: {name}")

        pipe = Pipeline([
            ('scaler', StandardScaler()),
            ('pca', PCA(n_components=3)),
            ('classifier', model)
        ])

        random_search = RandomizedSearchCV(
            pipe,
            param_distributions=param_dist,
            n_iter=10,        # You can increase this for a more thorough search
            cv=cv,
            random_state=42,
            n_jobs=-1
        )

        random_search.fit(X_train, y_train)

        print("  Best Parameters:", random_search.best_params_)
        print("  Best CV Score: {:.2f}".format(random_search.best_score_))
        print("  Test Set Accuracy: {:.2f}".format(random_search.score(X_test, y_test)))



🔁 3-Fold Cross-Validation (RandomizedSearch)

▶ Classifier: RandomForest
  Best Parameters: {'classifier__max_depth': 5, 'classifier__n_estimators': 137}
  Best CV Score: 0.94
  Test Set Accuracy: 1.00

▶ Classifier: LogisticRegression
  Best Parameters: {'classifier__C': np.float64(3.7554011884736247), 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}
  Best CV Score: 0.96
  Test Set Accuracy: 0.97

▶ Classifier: Perceptron
  Best Parameters: {'classifier__alpha': np.float64(0.0003058449429580245), 'classifier__penalty': 'l2'}
  Best CV Score: 0.88
  Test Set Accuracy: 0.93

▶ Classifier: KNN
  Best Parameters: {'classifier__n_neighbors': 9, 'classifier__weights': 'distance'}
  Best CV Score: 0.97
  Test Set Accuracy: 1.00

🔁 5-Fold Cross-Validation (RandomizedSearch)

▶ Classifier: RandomForest
  Best Parameters: {'classifier__max_depth': 9, 'classifier__n_estimators': 142}
  Best CV Score: 0.95
  Test Set Accuracy: 1.00

▶ Classifier: LogisticRegression
  Best Parameters: