In [1]:
# Step 1: Import the package
try:
    import imblearn
    from imblearn.over_sampling import SMOTE
    from imblearn.under_sampling import RandomUnderSampler
    from imblearn.pipeline import Pipeline
    from sklearn.datasets import make_classification
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import classification_report
    print("Step 1: Importing imbalanced-learn and necessary sklearn modules passed.")
except ImportError:
    print("Step 1 failed: 'imbalanced-learn' or required sklearn modules are not installed.")
    sys.exit(1)
except Exception as e:
    print(f"Step 1 failed with an unexpected error: {str(e)}")
    sys.exit(1)

# Step 2: Generate an imbalanced dataset
try:
    X, y = make_classification(n_samples=1000, n_features=20, n_informative=2, n_redundant=10,
                               n_clusters_per_class=1, weights=[0.99], flip_y=0, random_state=42)
    print("Step 2: Generating an imbalanced dataset passed.")
except Exception as e:
    print(f"Step 2 failed: {str(e)}")
    sys.exit(1)

# Step 3: Apply SMOTE to balance the dataset
try:
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    
    assert len(X_resampled) > len(X), "SMOTE did not generate additional samples."
    assert len(set(y_resampled)) == 2, "SMOTE did not maintain class distribution."
    print("Step 3: Applying SMOTE to balance the dataset passed.")
except Exception as e:
    print(f"Step 3 failed: {str(e)}")
    sys.exit(1)

# Step 4: Create a pipeline with SMOTE and RandomUnderSampler
try:
    pipeline = Pipeline([
        ('over', SMOTE(sampling_strategy=0.1, random_state=42)),
        ('under', RandomUnderSampler(sampling_strategy=0.5, random_state=42)),
        ('model', RandomForestClassifier(random_state=42))
    ])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    
    report = classification_report(y_test, y_pred)
    print(f"Step 4: Pipeline with SMOTE and RandomUnderSampler passed.\nClassification Report:\n{report}")
except Exception as e:
    print(f"Step 4 failed: {str(e)}")
    sys.exit(1)

# Step 5: Validate pipeline performance on imbalanced data
try:
    initial_pipeline = RandomForestClassifier(random_state=42)
    initial_pipeline.fit(X_train, y_train)
    initial_pred = initial_pipeline.predict(X_test)
    
    initial_report = classification_report(y_test, initial_pred)
    print(f"Step 5: Validate pipeline performance on imbalanced data passed.\nInitial Classification Report:\n{initial_report}")
except Exception as e:
    print(f"Step 5 failed: {str(e)}")
    sys.exit(1)

# Final Confirmation
print("All extensive tests for the 'imbalanced-learn' package completed successfully.")


Step 1: Importing imbalanced-learn and necessary sklearn modules passed.
Step 2: Generating an imbalanced dataset passed.
Step 3: Applying SMOTE to balance the dataset passed.
Step 4: Pipeline with SMOTE and RandomUnderSampler passed.
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       199
           1       0.50      1.00      0.67         1

    accuracy                           0.99       200
   macro avg       0.75      1.00      0.83       200
weighted avg       1.00      0.99      1.00       200

Step 5: Validate pipeline performance on imbalanced data passed.
Initial Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       199
           1       1.00      1.00      1.00         1

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00    