In [1]:
#1Write a Python program to: ● Load the Breast Cancer dataset using sklearn.datasets.load_breast_cancer()
#●	Train a Random Forest Classifier
#●	Print the top 5 most important features based on feature importance scores.
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X, y)

# Get top 5 important features
importances = rf.feature_importances_
indices = importances.argsort()[::-1][:5]

print("Top 5 important features:")
for i in indices:
    print(f"{data.feature_names[i]}: {importances[i]:.4f}")



Top 5 important features:
worst area: 0.1394
worst concave points: 0.1322
mean concave points: 0.1070
worst radius: 0.0828
worst perimeter: 0.0808


In [4]:
#2.Write a Python program to:
#●	Train a Bagging Classifier using Decision Trees on the Iris dataset
#●	Evaluate its accuracy and compare with a single Decision Tree
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Train a single Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

# ✅ Correct way (use 'estimator' instead of 'base_estimator')
bagging = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=50,   # number of trees
    random_state=42
)
bagging.fit(X_train, y_train)
bag_pred = bagging.predict(X_test)

# Accuracy comparison
print("Single Decision Tree Accuracy:", accuracy_score(y_test, dt_pred))
print("Bagging Classifier Accuracy:", accuracy_score(y_test, bag_pred))


Single Decision Tree Accuracy: 1.0
Bagging Classifier Accuracy: 1.0


In [5]:
#Write a Python program to:
#●	Train a Random Forest Classifier
#●	Tune hyperparameters max_depth and n_estimators using GridSearchCV
#●	Print the best parameters and final accuracy
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, 7, None]
}

grid = GridSearchCV(RandomForestClassifier(random_state=42),
                    param_grid, cv=5, scoring='accuracy')

grid.fit(X, y)

print("Best Parameters:", grid.best_params_)
print("Best Accuracy:", grid.best_score_)


Best Parameters: {'max_depth': 3, 'n_estimators': 50}
Best Accuracy: 0.9666666666666668


In [6]:
#4. Write a Python program to:
#●	Train a Bagging Regressor and a Random Forest Regressor on the California Housing dataset
#●	Compare their Mean Squared Errors (MSE)
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Load data
housing = fetch_california_housing()
X, y = housing.data, housing.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Bagging Regressor
bag_reg = BaggingRegressor(n_estimators=50, random_state=42)
bag_reg.fit(X_train, y_train)
bag_pred = bag_reg.predict(X_test)

# Random Forest Regressor
rf_reg = RandomForestRegressor(n_estimators=100, random_state=42)
rf_reg.fit(X_train, y_train)
rf_pred = rf_reg.predict(X_test)

print("Bagging MSE:", mean_squared_error(y_test, bag_pred))
print("Random Forest MSE:", mean_squared_error(y_test, rf_pred))



Bagging MSE: 0.25787382250585034
Random Forest MSE: 0.25650512920799395
