Generic backtracing code https://www.geeksforgeeks.org/backtracking-algorithms/

In [280]:
def solve(chars, safe_up_to):
    """Finds a solution to a backtracking problem. Based on  https://www.geeksforgeeks.org/backtracking-algorithms/

    chars     -- a sequence of values to try, in order. For a map coloring
                  problem, this may be a list of colors, such as ['red',
                  'green', 'yellow', 'purple']
    safe_up_to -- a function with two arguments, solution and position, that
                  returns whether the values assigned to slots 0..pos in
                  the solution list, satisfy the problem constraints.
    size       -- the total number of “slots” you are trying to fill

    Return the solution as a list of values.
    """
    values = chars.copy()
    size = len(values)
    solution = [None] * size
    def extend_solution(position):
        if values: 
            for i, value in enumerate(values):
                solution[position] = value
                if safe_up_to(solution, position):    
                    _ = values.pop(i)
                    if position >= size-1 or extend_solution(position+1):
                        return solution
        return solution

    return extend_solution(0)

In [276]:
test = {'max_[Ta]': 5.0,
 'max_[W]': 5.0,
 'max_[R]': 32.0,
 'max_[Tr]': 2.0,
 '[W]': 0.10425227257070213,
 '[Tr]': 0.05218413196461069,
 '[Ta]': 0.11461193780466528,
 '[R]': 0.7182974566793282,
 'length': 48.0}


test2  ={'max_[Ta]': 22.0,
 'max_[W]': 22.0,
 'max_[R]': 18.0,
 'max_[Tr]': 4.0,
 '[W]': 0.031419256539234386,
 '[Tr]': 0.12365332112631244,
 '[Ta]': 0.468630730800779,
 '[R]': 0.379081068026744,
 'length': 48.0}


In [106]:
from dispersant_screener.smiles2feat import check_validity
from dispersant_screener.featurizer import LinearPolymerSmilesFeaturizer

In [387]:
def _cluster_not_too_large(found_features, expected_features, beads=['[W]', '[Ta]', '[R]', '[Tr]']): 
    for bead in beads: 
        max_feat = 'max_' + bead
        if found_features[max_feat] > expected_features[max_feat]:
            return False
    return True

In [361]:
def _get_available_counts(counter, found_counter): 
    available = {}
    for k,v in counter.items(): 
        available[k] = v - found_counter[k]
        
    return available

In [363]:
def _cluster_still_possible(available, found_features, expected_features): 
    beads = list(available.keys()) 
        
    for bead in beads: 
        max_feat = 'max_' + bead
        if (available[bead] < expected_features[max_feat]) & (found_features[max_feat] < expected_features[max_feat]):
            return False
        
    return True

In [403]:
import math

def _no_unallowed_clusters_form(available, expected_features):
    beads = list(available.keys()) 
    total_available = sum(available.values())  
    for bead in beads:
        others = total_available - available[bead]
        max_feat = 'max_' + bead
        max_n = math.floor(available[bead]/expected_features[max_feat])
        
        if available[bead] > max_n * expected_features[max_feat] - (max_n - 1) * others:
            return False
        
    return True 

In [398]:
def safe_up_to_full(characters, position, expected_features, chars): 
    c = characters[:position+1]
    smiles = ''.join(c)
    
    alls = ''.join(chars)
    lp = LinearPolymerSmilesFeaturizer(alls)
    counter = lp.get_counts(alls, lp.characters)
    
    lp = LinearPolymerSmilesFeaturizer(smiles)
  
    
    try:
        found_features = lp.get_cluster_stats(smiles, lp.replacement_dict)
        found_counts = lp.get_counts(smiles, lp.characters)

        available = _get_available_counts(counter, found_counts)
      
        
        if not _cluster_not_too_large(found_features, expected_features): 
            return False
        
        if not _cluster_still_possible(available, found_features, expected_features):
            return False 
        
        if not _no_unallowed_clusters_form(available, expected_features):
            return False
            
    except ZeroDivisionError as e:
        return True

    
    return True

In [261]:
from functools import partial

In [262]:
smiles = '[Tr][Tr][Tr][R][Ta][R][Tr]'

In [257]:
lp = LinearPolymerSmilesFeaturizer(smiles)
lp.get_counts(smiles, lp.characters)

{'[W]': 0, '[Tr]': 4, '[Ta]': 1, '[R]': 2}

In [258]:
from dispersant_screener.smiles2feat import get_building_blocks

In [None]:
def split_up_chars(chars): 
    

In [438]:
chars = get_building_blocks(feat_dict=test2, cap=False, bundle=False)
safe_up_to = partial(safe_up_to_full, expected_features=test2, chars=chars)
solve(chars, safe_up_to)

['[Tr]',
 '[Tr]',
 '[W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W]',
 '[Tr][Tr][Tr][Tr]',
 '[Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta]',
 '[R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R]']