In [10]:
import fnmatch

def satisfies_forced_vars(subset, forced_vars):
    """
    Returns True if `subset` satisfies the forced-vars pattern logic.
    
    forced_vars is a list of lists (outer OR, inner AND).
    Each inner list is a set of patterns that must ALL match 
    (at least one variable in `subset` must match each pattern).
    Then the outer list is an OR across these lists.
    
    Case-insensitive. Uses fnmatch for wildcard matching:
      *fic* => any substring "fic"
      fico* => starts with "fico"
      *fico => ends with "fico"
      etc.
    """
    if not forced_vars:
        return True  # No constraints => always true

    subset_lc = [s.lower() for s in subset]

    for pattern_list in forced_vars:
        # pattern_list might be ['base1', 'base2', '*cltv*', '*fico*']
        # or something like ['*fico_lt*', '*cltv*']
        all_matched = True
        for pat in pattern_list:
            pat_lc = pat.lower()
            # if none in subset_lc matches this pattern, we fail
            if not any(fnmatch.fnmatch(var, pat_lc) for var in subset_lc):
                all_matched = False
                break
        if all_matched:
            return True  # If we satisfied all patterns in this sub-list => done

    return False


##########
# Testing #
##########

def test_satisfies_forced_vars():
    """
    We'll try various subsets and forced_vars sets
    to confirm the function does what's intended.
    """
    tests = []

    # 1) Simple exact matches
    forced = [ ['fico', 'cltv'] ]  # Must have 'fico' AND 'cltv'
    tests.append( (
        ['fico', 'cltv'], 
        forced, 
        True,  # exactly matches
        "Exact match #1"
    ))
    tests.append( (
        ['fico', 'cltv', 'other'], 
        forced, 
        True,  # superset
        "Exact match #2"
    ))
    tests.append( (
        ['fico'], 
        forced, 
        False,  # missing cltv
        "Exact match #3"
    ))
    tests.append( (
        [], 
        forced, 
        False,
        "Exact match #4"
    ))

    # 2) Wildcard single AND pattern
    forced = [ ['*fico*', '*cltv*'] ]  
    # Means must have at least one var that matches "*fico*" 
    # and at least one var that matches "*cltv*"
    tests.append( (
        ['fico_lt_650', 'cltv_ln'], 
        forced, 
        True, 
        "Wildcard AND #1"
    ))
    tests.append( (
        ['fico_lt_650', 'fico_ln_2.0'], 
        forced, 
        False,  # No variable matches *cltv*
        "Wildcard AND #2"
    ))

    # 3) OR logic: multiple pattern lists
    # e.g. forced_vars = [ [exact base1, exact base2, *fico*, *cltv*],
    #                      [exact base3, exact base4, *cltv*] ]
    forced = [
        ['base1', 'base2', '*fico*', '*cltv*'],
        ['base3', 'base4', '*cltv*']
    ]
    # Subset that matches first group fully
    tests.append( (
        ['base1','base2','fico_ABC','cltv_whatever'], 
        forced,
        True,
        "OR logic #1 => matches first group"
    ))
    # Subset that fails first group but matches second
    tests.append( (
        ['base3','base4','some_random_feature','CLTV_ln'], 
        forced,
        True,
        "OR logic #2 => matches second group"
    ))
    # Subset that doesn't match either group
    tests.append( (
        ['base1','fico_ABC','base3'], 
        forced,
        False,
        "OR logic #3 => partial bits but fails all ANDs"
    ))
    # Subset that is bigger than needed but still includes everything for second group
    tests.append( (
        ['base3','base4','fico_ABC','CLTV_ln','ANYTHING'], 
        forced,
        True,
        "OR logic #4 => superset for second group"
    ))
    # Check that case doesn't matter
    tests.append( (
        ['BASE1','BASE2','FICO_something','CLTV_SOMETHING'], 
        forced,
        True,
        "OR logic #5 => case-insensitive"
    ))

    # Now let's run them
    for subset, fv, expected, desc in tests:
        result = satisfies_forced_vars(subset, fv)
        passfail = "PASS" if result == expected else "FAIL"
        print(f"{desc:30} => subset={subset},\n"
              f"   forced={fv}\n"
              f"   expected={expected}, got={result} [{passfail}]\n")
    # return tests

if __name__ == "__main__":
    print(test_satisfies_forced_vars())


Exact match #1                 => subset=['fico', 'cltv'],
   forced=[['fico', 'cltv']]
   expected=True, got=True [PASS]

Exact match #2                 => subset=['fico', 'cltv', 'other'],
   forced=[['fico', 'cltv']]
   expected=True, got=True [PASS]

Exact match #3                 => subset=['fico'],
   forced=[['fico', 'cltv']]
   expected=False, got=False [PASS]

Exact match #4                 => subset=[],
   forced=[['fico', 'cltv']]
   expected=False, got=False [PASS]

Wildcard AND #1                => subset=['fico_lt_650', 'cltv_ln'],
   forced=[['*fico*', '*cltv*']]
   expected=True, got=True [PASS]

Wildcard AND #2                => subset=['fico_lt_650', 'fico_ln_2.0'],
   forced=[['*fico*', '*cltv*']]
   expected=False, got=False [PASS]

OR logic #1 => matches first group => subset=['base1', 'base2', 'fico_ABC', 'cltv_whatever'],
   forced=[['base1', 'base2', '*fico*', '*cltv*'], ['base3', 'base4', '*cltv*']]
   expected=True, got=True [PASS]

OR logic #2 => matches seco

In [None]:
import best_subset
help(best_subset)

In [6]:
tests

NameError: name 'tests' is not defined

In [20]:
satisfies_forced_vars(subset = ['x3', 'x4', 'x5','x1'], forced_vars= [['x1', 'x2'], ['x3', 'x4']])

False

In [44]:
def satisfies_forced_vars(forced_vars , subset):
    """
    Returns True if `subset` satisfies the forced_vars wildcard logic:
      - forced_vars is a list of lists (outer OR, inner AND).
      - Each inner list is a set of patterns that must ALL be matched by
        at least one feature in `subset`.
      - The outer list is OR => satisfying any sub-list is enough.
      - Case-insensitive. '*' in patterns can do startswith, endswith, etc.
    """
    # If forced_vars is empty, no constraints => always True
    if not forced_vars:
        return True

    # Lowercase the subset for case-insensitive matching
    subset_lc = [feat.lower() for feat in subset]

    # Outer OR: if we satisfy at least one sub-list => True
    for sublist in forced_vars:
        # sublist might be ['base1','base2','*fico*','*cltv*']
        all_matched = True
        for pattern in sublist:
            pat_lc = pattern.lower()
         
            # We need at least one feature that matches pat_lc
            if not any(fnmatch.fnmatch(f, pat_lc) for f in subset_lc):
                all_matched = False
                break
        # If we matched all patterns in this sublist => done
        if all_matched:
            return True
         
    return False


In [45]:
subset = ['x1', 'x4'] 
forced_vars= [['x1', 'x2'], ['x3', 'x4']]
satisfies_forced_vars(forced_vars, subset)

False

In [79]:
%%timeit
node = ['x1', 'x2', 'x3']
forced_combination= [['x1', 'x2'], ['x3', 'x4']]
def forced_features_are_node_subset(node, forced_combination):
        

        node = [n.lower() for n in node]

        removed = []
        return any([set(forced_list).issubset(node) for forced_list  in forced_combination])
            
forced_features_are_node_subset(node, forced_vars)            
        #     for forced_feature in forced_list:

        #         s
        #         forced_feature_lc = forced_feature.lower()
               
        #         print([fnmatch.fnmatch(n, forced_feature_lc) for n in node]) # is forced_fetures submset in the node
        #         print(any([fnmatch.fnmatch(n, forced_feature_lc) for n in node]))

        #         # removed = True 
        # return removed


1.81 μs ± 175 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [93]:
%%timeit
def forced_features_are_node_subset(node, forced_combination):
    node = [n.lower() for n in node]  # Convert all nodes to lowercase

    def matches(forced_list):
        for forced_feature in forced_list:
            if forced_feature.startswith('*') and forced_feature.endswith('*'):
                # Contains match
                keyword = forced_feature.strip('*')
                if not any(keyword in n for n in node):
                    return False
            elif forced_feature.startswith('*'):
                # Ends with match
                keyword = forced_feature.lstrip('*')
                if not any(n.endswith(keyword) for n in node):
                    return False
            elif forced_feature.endswith('*'):
                # Starts with match
                keyword = forced_feature.rstrip('*')
                if not any(n.startswith(keyword) for n in node):
                    return False
            else:
                # Exact match
                if forced_feature not in node:
                    return False
        return True  # All forced features matched at least one node

    return any(matches(forced_list) for forced_list in forced_combination)

node = ['x1', 'x2', 'x3']
node = ['f1012', 'x1', 'x2', 'x3']

forced_combination = [['X1', 'x2'], ['x3', 'x4'], ['*12L']]  

forced_features_are_node_subset(node, forced_combination)  # True (because ['x1', '*2'] matches ['x1', 'x2'])


4.65 μs ± 192 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [98]:
 
import fnmatch

def forced_features_are_node_subset(node, forced_combination):
    
    if not forced_combination:
        return True
        
    node = [n.lower() for n in node]

    def matches(forced_list):
        return all(any(fnmatch.fnmatch(n, forced_feature) for n in node) for forced_feature in forced_list)

    return any(matches(forced_list) for forced_list in forced_combination)
node = ['x1', 'x2', 'x3']
node = ['f1012', 'x1', 'x2', 'x3']

forced_combination = [['X1', 'x2'], ['x3', 'x4'], ['*12']]  

forced_features_are_node_subset(node, forced_combination)  # True (because ['x1', '*2'] matches ['x1', 'x2'])


True

In [91]:
lower_nested_elements(forced_combination)

[['x1', 'x2'], ['x3', 'x4'], ['*12l']]

In [None]:
subset = ['x3', 'x4', 'x5','x1'] 
subset_lc = [feat.lower() for feat in subset]
forced_vars= [['x1', 'x2'], ['x3', 'x4']]
sublist = ['x1', 'x2']
all_matched = True
 # pattern in sublist
fnmatch.fnmatch("fico_something", "*fico")
pattern = 'x3'
pat_lc = pattern.lower()
any([fnmatch.fnmatch(f, pat_lc) for f in subset_lc])

In [None]:
class Node:
    def __init__(self, key, branches, n, forced_vars=None):
        """
        key: the current subset of variables (excluding 'const')
        branches: how many branches remain
        n: the target subset size or node parameter
        forced_vars: list of lists of wildcard patterns (OR of ANDs)
                     Example:
                       [
                         ['base1', 'base2', '*cltv*', '*fico*'],
                         ['base3', 'base4', '*cltv*']
                       ]
        """
        if forced_vars is None:
            # Default to empty (no forced constraints)
            forced_vars = []

        self.key = key                # full subset (list of strings)
        self.key2 = key[:n]           # partial subset for bounding
        self.branch_id = n - branches + 1
        self.n = n
        self.forced_vars = forced_vars  # now it's a list of lists (OR of ANDs)

        self.child = []
        self.key_list = []
        self.has_branches = branches

    def satisfies_forced_vars(self, subset):
        """
        Returns True if `subset` satisfies the forced-vars pattern logic.
        
        forced_vars is a list of lists (outer OR, inner AND).
        Each inner list is a set of patterns that all must match at least once.
        """
        # If no forced patterns, automatically True
        if not self.forced_vars:
            return True

        # Lower-case subset for case-insensitive comparison
        subset_lc = [s.lower() for s in subset]

        # Outer loop: check each sub-list in forced_vars (OR condition)
        for pattern_list in self.forced_vars:
            # pattern_list might be ['base1', 'base2', '*cltv*', '*fico*']
            # We require that for each pattern in that list, 
            # there's at least one variable in `subset` that matches ignoring case
            all_matched = True
            for pat in pattern_list:
                pat_lc = pat.lower()  # case-insensitive
                # At least one var in subset that fnmatch matches the pattern
                if not any(fnmatch.fnmatch(var_lc, pat_lc) for var_lc in subset_lc):
                    all_matched = False
                    break
            
            # If we matched every pattern in this sub-list,
            # that means the subset satisfies it => overall True (OR logic)
            if all_matched:
                return True
        
        # If we never returned True, it means subset failed all sub-lists
        return False

    def add_children(self):
        """
        Create child nodes by popping one feature at a time,
        but skip if that removal leads to a subset that doesn't satisfy
        forced_vars logic.
        """
        visit = self.has_branches - 1

        for has_branches_new, _ in enumerate(range(visit, 0, -1)):
            child_branch_id = self.n - has_branches_new - 1
            temp = self.key[:]

            # Sanity check: child_branch_id might be out of range
            if child_branch_id < 0 or child_branch_id >= len(temp):
                continue

            removed_feat = temp.pop(child_branch_id)
 
             # Check if the new subset still satisfies forced patterns
            # If not, skip (prune)
            if not self.satisfies_forced_vars(temp):
                continue

            # Also skip if it doesn't reduce subset size 
            # (the original code had this logic)
            if len(temp) == self.n - 1:
                continue

            # If we made it here, we keep the child
            new_node = Node(
                temp, 
                has_branches_new + 2, 
                self.n, 
                forced_vars=self.forced_vars
            )
            self.child.append(new_node)
            self.key_list.append(temp)

n = 
Node(
    key = ['x1','x2', 'x3'],
    branches = 