In [104]:
import itertools
import numpy as np
import random

from dispersant_screener.featurizer import LinearPolymerSmilesFeaturizer

In [177]:
def get_cap(pool, exclude): 
    for i, char in enumerate(pool): 
        if char != exclude: 
            cap = pool.pop(i)
            break
            
    return pool, char

In [294]:
def bundle_indv(pool): 
    if len(pool) > 0:
        pairs = []
        char_in_pool = set(pool)

        character_lists = []

        lengths = []

        for poolchar in char_in_pool: 
            sublist = []
            for char in pool: 
                if poolchar == char: 
                    sublist.append(char)
            character_lists.append(sublist)
            lengths.append(len(sublist))

        num_pairs = min(lengths)

        for i in reversed(range(num_pairs)): 
            pair = []
            random.shuffle(character_lists)
            for sublist in character_lists: 
                pair.append(sublist.pop(i))

            pairs.append(''.join(pair))

        flat_sublists = sum(character_lists, [])

        flat_sublists = bundle_indv(flat_sublists)
        return flat_sublists + pairs
    return pool
    
    
                

In [295]:
pool = ['a', 'b', 'a', 'c', 'b', 'a',  'b', 'c']

In [296]:
len(pool)

8

In [297]:
bundle_indv(pool)

['ab', 'cba', 'abc']

In [509]:
def get_building_blocks(feat_dict, bundle=True):
    W, Tr, R, Ta, W_cl, Tr_cl, R_cl, Ta_cl, length  = feat_dict['[W]'], feat_dict['[Tr]'], feat_dict['[R]'], feat_dict['[Ta]'], feat_dict['max_[W]'], feat_dict['max_[Tr]'], feat_dict['max_[R]'], feat_dict['max_[Ta]'], feat_dict['length']
    length_ = length
    length = round(length)
    w = round(W * length_)
    tr = round(Tr * length_)
    r = round(R * length_)
    ta = round(Ta * length_)
        
    total_length = w + tr + r + ta 
    if total_length == length:
        w_cluster = round(W_cl)
        tr_cluster = round(Tr_cl)
        r_cluster = round(R_cl)
        ta_cluster = round(Ta_cl)

        w_indv = w - w_cluster
        tr_indv = tr - tr_cluster
        r_indv = r - r_cluster 
        ta_indv = ta - ta_cluster 

        building_blocks = []


        indv_pool = []

        if w_indv: 
            indv_pool.extend(['[W]']*w_indv)
        if tr_indv: 
            indv_pool.extend(['[Tr]']*tr_indv)
        if r_indv: 
            indv_pool.extend(['[R]']*r_indv)
        if ta_indv: 
            indv_pool.extend(['[Ta]'] * ta_indv)

        if w_cluster: 
            try:
                indv_pool, cap_a = get_cap(indv_pool, '[W]')
                indv_pool, cap_b = get_cap(indv_pool, '[W]')
            except Exception: 
                cap_a, cap_b = '', ''
            building_blocks.append(cap_a + '[W]'*w_cluster + cap_b)
        if tr_cluster: 
            try:
                indv_pool, cap_a = get_cap(indv_pool, '[Tr]')
                indv_pool, cap_b = get_cap(indv_pool, '[Tr]')
            except Exception:
                cap_a, cap_b = '', ''
            building_blocks.append(cap_a+'[Tr]'*tr_cluster+cap_b)
        if r_cluster: 
            try:
                indv_pool, cap_a = get_cap(indv_pool, '[R]')
                indv_pool, cap_b = get_cap(indv_pool, '[R]')
            except Exception:
                cap_a, cap_b = '', ''
            building_blocks.append(cap_a + '[R]'*r_cluster + cap_b)
        if ta_cluster: 
            try:
                indv_pool, cap_a = get_cap(indv_pool, '[Ta]')
                indv_pool, cap_b = get_cap(indv_pool, '[Ta]')
            except Exception: 
                cap_a, cap_b = '', ''
            building_blocks.append(cap_a + '[Ta]'*ta_cluster + cap_b)

        if bundle: 
            indv_pool = bundle_indv(indv_pool)

        building_blocks.extend(indv_pool)
        random.shuffle(building_blocks)

        return building_blocks
    else: 
        raise ValueError('Length does not match {}, {}'.format(length, total_length))

In [510]:
def check_validity(smiles, feat_dict):
    lp = LinearPolymerSmilesFeaturizer(smiles)
    feat = lp.featurize()
    # check length
    if feat['length'] % 2 !=0: 
        return False 
    
    if feat['max_[Ta]'] != feat_dict['max_[W]']: 
        return False
    
    if feat['max_[Tr]'] != feat_dict['max_[Tr]']: 
        return False
    
    if feat['max_[R]'] !=  feat_dict['max_[R]']:
        return False
    
    if feat['max_[W]'] != feat_dict['max_[W]']: 
        return False
    
    return True

In [546]:
test = {'max_[Ta]': 5.0,
 'max_[W]': 5.0,
 'max_[R]': 32.0,
 'max_[Tr]': 2.0,
 '[W]': 0.10425227257070213,
 '[Tr]': 0.05218413196461069,
 '[Ta]': 0.11461193780466528,
 '[R]': 0.7182974566793282,
 'length': 48.0}


In [547]:
def get_smiles(feat_dict, max_smiles=5, max_trials=5000): 
    try:
        polymer = get_building_blocks(feat_dict)  
        perms = []
        reversed_perms = []
        for i in range(max_smiles):           
            trials = 0
            while True and (trials < max_trials):
                polymer = get_building_blocks(feat_dict)  
                perm = np.random.permutation(polymer)  
                rev = reversed(perm)
                key = ''.join(perm)
                rev_key = ''.join(rev)

                # Check especially important to verify that we didn't create too large clusters
                # With the individual characters
                valid = check_validity(key, feat_dict)
                if valid:
                    # Doesn't matter in which order the smiles is written
                    if (key or rev_key not in perms) and (key or rev_key not in perms):                
                        perms.append(key)               
                        reversed_perms.append(rev_key)
                        break    
                trials +=1

        if len(perms) == 0: 
            trials = 0
            while True and (trials < max_trials):
                polymer = get_building_blocks(feat_dict, bundle=False)  
                perm = np.random.permutation(polymer)  
                rev = reversed(perm)
                key = ''.join(perm)
                rev_key = ''.join(rev)

                # Check especially important to verify that we didn't create too large clusters
                # With the individual characters
                valid = check_validity(key, feat_dict)
                if valid:
                    # Doesn't matter in which order the smiles is written
                    if (key or rev_key not in perms) and (key or rev_key not in perms):                
                        perms.append(key)               
                        reversed_perms.append(rev_key)
                        break    
                trials +=1
        if perms: 
            print('Found SMILES!')
        return perms
    except ValueError as e:
        print(e)
        return []

In [548]:
test ={'max_[Ta]': 22.0,
 'max_[W]': 22.0,
 'max_[R]': 18.0,
 'max_[Tr]': 4.0,
 '[W]': 0.031419256539234386,
 '[Tr]': 0.12365332112631244,
 '[Ta]': 0.468630730800779,
 '[R]': 0.379081068026744,
 'length': 48.0}

In [549]:
get_smiles(test)

Found SMILES!


['[Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Tr][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][Tr][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][Tr][Tr][Tr][Tr]',
 '[R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][Tr][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][Tr][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Tr][Tr][Tr][Tr]',
 '[Tr][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][Tr][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Tr][Tr][Tr][Tr][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R]',
 '[Tr][Tr][Tr][Tr][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][Ta][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][Tr][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][W][Tr]',
 '[R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R

In [536]:
test['[W]'] * 43

6.523376855128962

In [537]:
test['[R]'] * 43

17.438666853020642

In [540]:
test['[Tr]'] * 43

2.5583567949574473

In [541]:
test['[Ta]'] * 43

15.740712095197383

In [542]:
get_building_blocks(test)

['[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[W][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][R][Tr]',
 '[Ta]',
 '[Tr][W][W][W][W][W][W][Tr]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta]',
 '[Ta][Ta][Ta]',
 '[Ta]']