# Ensemble Learning with Decision Trees

## Simple classification problem using Random Forests and Boosted Decision Trees

*Costas Andreopoulos \<c.andreopoulos@cern.ch\>*

In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Generate synthetic data
X, Y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [3]:
# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, Y_train)
# Make predictions for the test set
Y_pred_rf = rf.predict(X_test)
# Evaluate performance
print(f"Random Forest Accuracy: {100*accuracy_score(Y_test, Y_pred_rf):.2f}%")

Random Forest Accuracy: 90.00%


In [4]:
# Train Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb.fit(X_train, Y_train)
# Make predictions for the test set
Y_pred_gb = gb.predict(X_test)
# Evaluate performance
print(f"Gradient Boosting Accuracy: {100*accuracy_score(Y_test, Y_pred_gb):.2f}%")

Gradient Boosting Accuracy: 91.00%
