In [2]:
import pandas as pd
from sklearn.datasets import load_iris, load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

# Function to test model on any dataset
def evaluate_model(X, y, dataset_name):
    # Small subset for overfitting example
    X_small, _, y_small, _ = train_test_split(X, y, train_size=0.1, stratify=y, random_state=42)

    for name, X_data, y_data in [("Small", X_small, y_small), ("Full", X, y)]:
        X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)
        model = RandomForestClassifier()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        cv_score = cross_val_score(model, X_data, y_data, cv=5).mean()
        print(f"{dataset_name} - {name} Dataset:")
        print("Accuracy:", acc)
        print("Cross-Validation Score:", cv_score)
        print("-" * 40)

# Iris Dataset
iris = load_iris()
evaluate_model(iris.data, iris.target, "Iris")

# Digits Dataset
digits = load_digits()
evaluate_model(digits.data, digits.target, "Digits")


Iris - Small Dataset:
Accuracy: 0.6666666666666666
Cross-Validation Score: 0.9333333333333332
----------------------------------------
Iris - Full Dataset:
Accuracy: 1.0
Cross-Validation Score: 0.96
----------------------------------------
Digits - Small Dataset:
Accuracy: 0.8611111111111112
Cross-Validation Score: 0.8882539682539683
----------------------------------------
Digits - Full Dataset:
Accuracy: 0.975
Cross-Validation Score: 0.937700402352213
----------------------------------------
