In [None]:
import polars as pl
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from time import perf_counter
import dsds.fs as fs
import dsds 
dsds.NO_PROGRESS_BAR = True

In [None]:
orig_x, orig_y = make_classification(n_samples = 10_000, n_features = 500, n_informative = 60, n_redundant = 440)
# This is a Polars dataframe. This is dsds package's favored dataframe. dsds relies on Polars heavily.
# You must turn other dataframe formats into Polars for dsds to work.
df = pl.from_numpy(orig_x).insert_at_idx(0, pl.Series("target", orig_y)) 
# Turn it into Pandas.
df_pd = df.to_pandas()
target = "target"
features = df.columns
features.remove(target)

In [None]:
# Model Based Feature Importance.
# LGBM hyperparameters is automatically suggested by Optuna with 30-trial tuning.
# More user control on this tuning process will be added in the future.

f1 = fs.mrmr(
    df,
    "target",
    50,
    relevance="lgbm",
    mrmr_strategy="accum_corr"
)

In [None]:
f2 = fs.mrmr(
    df,
    "target",
    50,
    relevance="f",
    mrmr_strategy= "accum_corr"
)

In [None]:
from mrmr import mrmr_classif

def mrmr_package(df:pd.DataFrame, target:str, k:int) -> list[str]:
    features = list(df.columns)
    features.remove(target)
    X = df[features]
    y = df[target]
    output = mrmr_classif(X, y, K = k)
    return output


In [None]:
f3 = mrmr_package(df_pd, "target", 50)

In [None]:
f2 == f3 

In [None]:
import dsds.metrics as me


me.jaccard_similarity(f1, f2)