In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score


In [2]:
# Generate a synthetic imbalanced dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=18,
n_redundant=2, n_classes=2, weights=[0.95, 0.05], random_state=42)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Base classifier (Decision Tree)
base_classifier = DecisionTreeClassifier(random_state=42)


In [3]:
# Fit the base classifier
base_classifier.fit(X_train, y_train)

# Fit the bagging classifier
bagging_classifier = BaggingClassifier(base_classifier, n_estimators=10, random_state=42)
bagging_classifier.fit(X_train, y_train)


In [4]:
# Make predictions
bagging_predictions = bagging_classifier.predict(X_test)


In [5]:


# Compare the performance
base_classifier_accuracy = accuracy_score(y_test, base_classifier.predict(X_test))
bagging_accuracy = accuracy_score(y_test, bagging_predictions)

print("Base Classifier Accuracy:", base_classifier_accuracy)
print("Bagging Classifier Accuracy:", bagging_accuracy)


Base Classifier Accuracy: 0.91
Bagging Classifier Accuracy: 0.965


1. **Base classifier accuracy:** 0.91  
2. **Bagging classifier accuracy:** 0.965  
3. **Comparison:** Bagging is more accurate, improving from 91% to 96.5%.

In [6]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier


In [7]:
# Boosting (AdaBoost)
adaboost_classifier = AdaBoostClassifier(base_classifier, n_estimators=50, random_state=42)
adaboost_classifier.fit(X_train, y_train)
adaboost_predictions = adaboost_classifier.predict(X_test)




In [8]:
# Boosting (Gradient Boosting)
gradientboost_classifier = GradientBoostingClassifier(n_estimators=50, random_state=42)
gradientboost_classifier.fit(X_train, y_train)
gradientboost_predictions = gradientboost_classifier.predict(X_test)


In [10]:
# Compare the performance
adaboost_accuracy = accuracy_score(y_test, adaboost_predictions)
gradientboost_accuracy = accuracy_score(y_test, gradientboost_predictions)
print("AdaBoost Classifier Accuracy:", adaboost_accuracy)
print("Gradient Boosting Classifier Accuracy:", gradientboost_accuracy)


AdaBoost Classifier Accuracy: 0.92
Gradient Boosting Classifier Accuracy: 0.965


1. **Accuracy of the AdaBoost classifier on the test set:** 0.92  
2. **Accuracy of the Gradient Boosting classifier on the test set:** 0.965  
3. **Comparison:**  
   - Gradient Boosting performs better than AdaBoost, with an accuracy of 96.5% compared to 92%.  
   - Both boosting methods outperform the base classifier (91%).  
   - Gradient Boosting achieves the same accuracy as the Bagging classifier in this case.

In [12]:
!pip install imblearn

Defaulting to user installation because normal site-packages is not writeable
Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Collecting imbalanced-learn (from imblearn)
  Downloading imbalanced_learn-0.12.4-py3-none-any.whl.metadata (8.3 kB)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Downloading imbalanced_learn-0.12.4-py3-none-any.whl (258 kB)
Installing collected packages: imbalanced-learn, imblearn
Successfully installed imbalanced-learn-0.12.4 imblearn-0.0


DEPRECATION: Loading egg at c:\program files\python312\lib\site-packages\vboxapi-1.0-py3.12.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330


In [22]:
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


In [21]:
pip install SMOTE

Defaulting to user installation because normal site-packages is not writeable
Collecting SMOTE
  Downloading smote-0.1-py2.py3-none-any.whl.metadata (278 bytes)
Downloading smote-0.1-py2.py3-none-any.whl (3.3 kB)
Installing collected packages: SMOTE
Successfully installed SMOTE-0.1
Note: you may need to restart the kernel to use updated packages.


DEPRECATION: Loading egg at c:\program files\python312\lib\site-packages\vboxapi-1.0-py3.12.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330


In [23]:

oversampler = SMOTE(random_state=42)

# Resample the training data
X_resampled, y_resampled = oversampler.fit_resample(X_train, y_train)

In [24]:
# Define the resampled classifier
resampled_classifier = DecisionTreeClassifier(random_state=42)


In [25]:



# Train the classifier on the resampled data
resampled_classifier.fit(X_resampled, y_resampled)

# Make predictions on the test set
resampled_predictions = resampled_classifier.predict(X_test)

# Compare the performance
resampled_accuracy = accuracy_score(y_test, resampled_predictions)
print("Resampled Classifier Accuracy:", resampled_accuracy)


Resampled Classifier Accuracy: 0.89


1. **Purpose of oversampling:** To balance the dataset by increasing minority class representation, improving model performance on both classes.  
2. **Comparison:** The resampled classifier's accuracy (0.89) is lower than the original classifiers (e.g., Bagging: 0.965), but it may improve minority class performance.