In [15]:
import pytest
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes

from feature_selection import EAcmi_framework_tol

def test_EAcmi_framework_tol_sklearn_regression():
    """
    This test loads a standard scikit-learn diabetes dataset 
    (10 features, continuous target) and applies EAcmi_framework_tol.
    We check that the function completes without error and returns 
    a non-empty DataFrame of results.
    """
    # Load the diabetes dataset
    X, y = load_diabetes(return_X_y=True)

    # We can choose some threshold for the CMI/MI ratio
    # e.g., 0.01, or 0.02
    threshold = 0.1

    # Call your function. 
    # If your function is in the same file, just call it directly;
    # otherwise import from the relevant module.
    df_results = EAcmi_framework_tol(X, y, threshold, silent=True)

    # Basic checks
    # 1) Ensure we got a Pandas DataFrame
    assert isinstance(df_results, pd.DataFrame)

    # 2) Usually we expect at least one feature to be selected
    #    But if the data or threshold is unusual, it *could* be empty.
    #    We'll check that it doesn't raise errors anyway.
    #    If you want to enforce at least 1 row, do:
    assert len(df_results) > 0

    # Optionally print or log it
    print(df_results)

In [16]:
test_EAcmi_framework_tol_sklearn_regression()

   Iteration    Input     CMI      MI  CMI.MI.ratio  CMIevals  CPUtime  \
0          1  Input_2  0.2738  0.2738        1.0000        10   0.0099   
1          2  Input_8  0.1064  0.3802        0.2800        19   0.0316   
2          3  Input_3  0.0410  0.4212        0.0973        27   0.0452   

   ElapsedTime  
0       0.0124  
1       0.0390  
2       0.0534  


In [19]:
def test_EAcmi_framework_tol_large_data():
    """
    Test EAcmi_framework_tol on a large random dataset: 10,000 rows x 200 features.
    """
    # To ensure repeatability, set a fixed seed:
    np.random.seed(42)
    
    # Create a dataset of shape (10000, 200)
    n_rows = 2000
    n_feats = 50
    X = np.random.randn(n_rows, n_feats)
    
    # Create a target y with 10,000 rows (1D)
    y = np.random.randn(n_rows)
    
    # Choose a threshold for the ratio
    threshold = 0.01
    
    # Run the function
    # Depending on your CPU and algorithm complexity, this might take a while.
    df_results = EAcmi_framework_tol(X, y, thresh=threshold, silent=True)
    
    # Basic checks:
    assert isinstance(df_results, pd.DataFrame), \
        "Expected a Pandas DataFrame return."
    
    # The DataFrame might be empty if no features are selected above threshold.
    # Typically you'd expect some selection, but let's just confirm no error:
    # If you *do* expect at least 1 feature in normal circumstances, you can do:
    # assert not df_results.empty, "Expected at least one feature to be selected."
    
    # Optionally print or log the result
    print(df_results)

In [None]:
test_EAcmi_framework_tol_large_data()