In [1]:
import deepsmiles
import mbuild as mb
import numpy as np

# poly-smiles:
Quickly and easily create SMILES strings of polymers of any length from the SMILES string of the monomer

The key to this functionality is the use of **DeepSMILES**, which has more simple formatting rules that typically required of SMILES strings.

**Repo**: https://github.com/baoilleach/deepsmiles

`pip install --upgrade deepsmiles`

### Current Functionality:
    - Create polymer smiles string from a monomer
    
### Planned Functionality:
    - Create polydisperse system from a dictionary or a distribution function
    - Create a polydisperse system that corresponds with a PDI value
    - Create polymers with different regio-regularities
    
    
### To Do:
    - Change atom counting logic to handle elements with 2 characters
    - See if bond site can be indicated by string index # rather than **
        Does the index # translate from SMILES to DeepSMILES?
        
    - Run some quick performance checks
        - How does it scale with num of atoms?
        - Compare against mbuild's current polymer plugin
        - Compare against initializing a system of just monomers
            - Should indicate how much of the time required is due to the polymerization step

In [9]:
def convert_smiles(smiles=False, deep=False):   
    '''
    smiles and deep must be str format
    Converts from SMILES to DeepSMILES and vice versa.
    Whichever has a string provided, will convert to the other.
    If strings are proivded for both, then nothing happens
    '''
    
    converter = deepsmiles.Converter(rings=True, branches=True)
    if smiles and deep:
        print('Only provide a string for one of smiles or deep')
        return()
    if smiles: # Convert from SMILES to DeepSMILES
        deep_string = converter.encode(smiles)
        return deep_string
    if deep: # Convert from DeepSMILES to SMILES
        smiles_string = converter.decode(deep)
        return smiles_string 
    
    
def poly_smiles(monomer_string, length=2, ftype = 'mol2', string_only=True,
                energy_min=False, save=False, visualize=False):
    
    # Find how many branch-brackets are required at polymerization site
    atom_count = 0
    bracket_count = 0
    for s in monomer_string:
        if s.isalpha():
            atom_count += 1    
        if s == ')':
            bracket_count += 1 
    if bracket_count == 0:
        brackets = ')' * atom_count
    elif bracket_count != 0:
        brackets = ')' * (atom_count - bracket_count)
       
    # Find index num of poly site on modified DEEP SMILES string
    monomer_list = list(monomer_string)
    if '*' not in monomer_list:
        return(print('ERROR: Identify the wanted polymerization site using *x*'))
    key_indices = [index for index, value in enumerate(monomer_list) if value == '*']  
    if len(key_indices) != 2:   # Checks for only a single given poly site
        return(print('ERROR: Select only one polymerization site using *x*'))
    if key_indices[1] - key_indices[0] != 2:   # Check that the * are surrounding only a single atom
        return(print('ERROR: Select only one polymerization site using *x*'))
    monomer_list[key_indices[1]] = '{}' + '{}'.format(brackets) # Create poly site+brackets to the right of the atom
    monomer_list.remove('*')
    template = ''.join(monomer_list)  # Monomer string with the needed {} and without second * in the string
    monomer_list.remove('{}' + '{}'.format(brackets))
    monomer = ''.join(monomer_list)  # Pure deepsmiles monomer string without {} or *
    # What is the difference between monomer and monomer_string without **??
    
    # Loop & format polymer
    polymer = '{}'
    for i in range(0, length):
        if i == length - 1:
            polymer = polymer.format(monomer)
            break
        polymer = polymer.format(template)

    polymer_smiles = convert_smiles(deep = polymer)
    compound = mb.load(polymer_smiles, smiles = True)
    if string_only:
        return(polymer_smiles)
    if energy_min:
        compound.energy_minimize(steps = 25, forcefield='GAFF', algorithm = 'md')
    if visualize:
        compound.visualize().show()  
    if save:
        file_name = "comp_{}mer.{}".format(length, ftype)
        compound.save(file_name, overwrite = True)
    return compound


def viz(smiles_string, deep = True, energy_min = False):
    if deep:
        smiles_string = convert_smiles(deep = smiles_string)
    comp = mb.load(smiles_string, smiles = True)
    if energy_min:
        comp.energy_minimize(algorithm='md', steps=100)
    comp.visualize().show()
    
    
def polydisperse_system(system_dict=None, distribution=None):
    
    #system = mb.Compound()
    system_compounds = []
    if system_dict:
        for monomer in system_dict:
            for length in system_dict[monomer]:
                num = system_dict[monomer][length]
                polymer = poly_smiles(monomer, length, string_only=False)
                for i in range(num):
                    #system.add(polymer)
                    system_compounds.append(polymer)
                    
        system = mb.fill_box(system_compounds, [1 for i in system_compounds], density=100)
        return system
            
    if distribution:
        pass
    else:
        print('Specify method to generate system')
        

In [14]:
benzene = 'cc*c*ccc6'
thiophene = 'csc*c*c5'
benzene_d = {4:3, 8:10, 12:3}  # polymer_length:number_of_polymers
thiophene_d = {4:3, 8:10, 12:3}
system_d = {benzene:benzene_d,
           thiophene:thiophene_d}

system = polydisperse_system(system_dict=system_d)

In [15]:
system.visualize()

<py3Dmol.view at 0x7fe0f270a5d0>

4
8
12
4
8
12
