In [6]:
import deepsmiles
import itertools as it
import json
import mbuild as mb
from utils.smiles_utils import convert_smiles, viz

#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
#import pymatgen as mg
#from pymatgen import symmetry

In [7]:
def poly_smiles(monomer_string, length=2, ftype = 'mol2', string_only=True,
                energy_min=False, save=False, visualize=False):
    
    # Find how many branch-brackets are required at polymerization site
    atom_count = 0
    bracket_count = 0
    for s in monomer_string:
        if s.isalpha():
            atom_count += 1    
        if s == ')':
            bracket_count += 1 
    if bracket_count == 0:
        brackets = ')' * atom_count
    elif bracket_count != 0:
        brackets = ')' * (atom_count - bracket_count)
       
    # Find index num of poly site on modified DEEP SMILES string
    monomer_list = list(monomer_string)
    if '*' not in monomer_list:
        return(print('ERROR: Identify the wanted polymerization site using *x*'))
    key_indices = [index for index, value in enumerate(monomer_list) if value == '*']  
    if len(key_indices) != 2:   # Checks for only a single given poly site
        return(print('ERROR: Select only one polymerization site using *x*'))
    if key_indices[1] - key_indices[0] != 2:   # Check that the * are surrounding only a single atom
        return(print('ERROR: Select only one polymerization site using *x*'))
    monomer_list[key_indices[1]] = '{}' + '{}'.format(brackets) # Create poly site+brackets to the right of the atom
    monomer_list.remove('*')
    template = ''.join(monomer_list)  # Monomer string with the needed {} and without second * in the string
    monomer_list.remove('{}' + '{}'.format(brackets))
    monomer = ''.join(monomer_list)  # Pure deepsmiles monomer string without {} or *
    # What is the difference between monomer and monomer_string without **??
    print(monomer)   
    # Loop & format polymer
    polymer = '{}'
    for i in range(0, length):
        if i == length - 1:
            polymer = polymer.format(monomer)
            break
        polymer = polymer.format(template)

    polymer_smiles = convert_smiles(deep = polymer)
    if string_only:
        return(polymer_smiles)
    compound = mb.load(polymer_smiles, smiles = True)
    if energy_min:
        compound.energy_minimize(steps = 25, forcefield='GAFF', algorithm = 'md')
    if visualize:
        compound.visualize().show()  
    if save:
        file_name = "comp_{}mer.{}".format(length, ftype)
        compound.save(file_name, overwrite = True)
    return compound, polymer_smiles

def viz(smiles_string=None, deep=False, energy_min = False, json_file=False):
    
    if json_file:
        with open('typed-components/{}'.format(json_file)) as jf:
            d = json.load(jf)
            smiles_string = d['smiles']
    else:
        if deep:
            smiles_string = convert_smiles(deep = smiles_string)    
    comp = mb.load(smiles_string, smiles = True)
    if energy_min:
        comp.energy_minimize(algorithm='md', steps=100)
    comp.visualize().show()

def find_bond_site(smiles_string, deep = True):
    '''    
    '''
    bonding_dicts = []  # List of dictionaries of each bonding site
    smiles_string_list = list(smiles_string)
    template_list = list(smiles_string)
    for index, char in enumerate(smiles_string_list):
        d = {}
        if char.lower() == 'c':  # The index belongs to a carbon site 'c'
            next_ind = index + 1
            try:
                if smiles_string_list[index + 1].isalpha():  # The following character in the string is an atom
                    index_to_change = index  # Change the original site
                    original_value = char
                    smiles_string_list[index_to_change] = original_value + 'N)'
                else:  # The following character is the SMILES string is a symbol or number
                    try:
                        while not smiles_string_list[next_ind].isalpha(): # Find the next alpha containing index
                            next_ind += 1
                        index_to_change = next_ind - 1
                    except:
                        index_to_change = -1  # The SMILES string ends with with a non-alpha value
            
                    original_value = smiles_string_list[index_to_change]  # Add the branch to the last non-alpha
                    smiles_string_list[index_to_change] = original_value + 'N)'
    
            except:
                index_to_change = index # The SMILES string ends with a 'c'
                original_value = char
                smiles_string_list[index_to_change] = original_value + 'N)'
                
            temp_string = ''.join(smiles_string_list)
            viz(temp_string)
            print('Add as possible bonding site? (y/n) or (exit)')  # QUESTION No. 1
            add = input()
            if add.lower() == 'yes' or add.lower() == 'y':
                template_list[index_to_change] = original_value + '{}'
                print('Classify bonding site as (1) branch, (2) polymerization, or (3) both') # QUESTION No. 2
                site_type = input()
                if site_type == '1':
                    bond_site_type = 'branch'
                elif site_type == '2':
                    bond_site_type = 'poly'
                    num_of_bonds = 1
                elif site_type == '3':
                    bond_site_type = 'both'
                if bond_site_type == 'branch' or bond_site_type == 'both':
                    print('How many bonds can be formed from this atom? (1, 2, 3, 4..etc)') # QUESTION No. 3
                    num_of_bonds = int(input())
                d['index'] = index_to_change
                d['type'] = bond_site_type
                d['num_bonds'] = num_of_bonds
                bonding_dicts.append(d)
            if add.lower() == 'exit':
                break
            smiles_string_list[index_to_change] = original_value
        else:
            pass
    template_molecule_string = ''.join(template_list)
    
    return bonding_dicts, template_molecule_string
       

    
def add_component(name, structure_type, string, full_name = 'Not Provided', deep_smiles = False):
    '''
    Create a dictionary for a single compound/component using a given SMILES string representation.
    Compound can be created starting with either standard SMILES or DeepSMILES formatting 
    Ultimately, the dictionary will contain both types of SMILES strings. 
    If the structure_type (subclass) is backbone, then the user will go through process of
    adding and typing bonding sites. Dictionary stores the bonding information.
    
    name : str, required
        Generic, identifiable name (Ex. "ITIC", "PTB7", "Benzene")
    string : str, required
        A SMILES string in either standard SMILES or DeepSMILES
    structure_type : str, required
        Options are backbone, branch, constituent. 
    full_name : str, optional, default = "Not Provided"
        Stores the full/official name of the structure
    deep_smiles : bool, required, default = False
        If string parameter is in the DeepSMILES format, then deep_smiles should be True
        If string parameter is in the standard SMILES format, then deep_smiles should be False
    
    TODO:
    Add try/except for building compound given the string/deep_smiles combo
    Add write dictionary to JSON file - Ask Mike about the Sig'c1ccc(cc1)S(=O)(=O)c%20cc(c2ccc(cc2)S(=O)(=O)c%19cc(c3ccc(cc3)S(=O)(=O)c%18cc(c4ccc(cc4)S(=O)(=O)c%17cc(c5ccc(cc5)S(=O)(=O)c%16cc(c6ccc(cc6)S(=O)(=O)c%15cc(c7ccc(cc7)S(=O)(=O)c%14cc(c8ccc(cc8)S(=O)(=O)c%13cc(c9ccc(cc9)S(=O)(=O)c%12cc(c%10ccc(cc%10)S(=O)(=O)c%11ccccc%11)ccc%12)ccc%13)ccc%14)ccc%15)ccc%16)ccc%17)ccc%18)ccc%19)ccc%20'nac .json file formats. Dictionary of dictionaries?
    
    ''' 
    d = {}
    if deep_smiles:  # string parameter is in DeepSMILES format
        smiles_string = convert_smiles(deep = string) 
        deep_smiles_string = string
    else:  #  string parameter is in standard SMILES format
        smiles_string = string
        deep_smiles_string = convert_smiles(smiles = string)
    if structure_type == 'backbone':
        bond_dict, template = find_bond_site(deep_smiles_string)
        d['template'] = template
    else:
        bond_dict = {}      
    
    d['name'] = name
    d['class'] = structure_type
    d['smiles'] = smiles_string
    d['deep_smiles'] = deep_smiles_string
    d['bonding'] = bond_dict
    d['full name'] = full_name

    file_name = 'typed-components/{}.json'.format(name)
    with open(file_name, 'w') as fp:
        json.dump(d, fp)
    return d

def read_comp():  
    with open(fpath) as jf:
        d = json.load(jf)
        for key in d:
            print('{}: {}'.format(key, d[key]))

            
def get_smiles_string(component, smi_type='smiles'):
    
    with open('typed-components/{}.json'.format(component)) as jf:
        d = json.load(jf)
        if type == 'smiles':
            smiles = d['smiles']
        elif type == 'deep_smiles':
            smiles = d['deep_smiles']
        elif type == 'template':
            smiles = d['template']
        return smiles

In [104]:
branch = add_component(name = 'benzene-cyclopentane', structure_type='branch', string = "C=c2c(=O)c1c(cccc1)c2=C(CC)CC")

In [5]:
def build_compound(backbone, branches, polymerize=False):
    
    def get_bonding_sites():
        bond_indices = []
        with open('typed-components/{}.json'.format(backbone)) as jf:
            d = json.load(jf)
            bond_info = d['bonding']
            for dictionary in bond_info:
                if not polymerize:
                    bond_indices.append(dictionary['index'])
                elif polyermize:
                    if dictionary['type'] != 'poly':
                        bond_indices.append(dictionary['index'])
        return bond_indices
        
    backbone_smiles = get_smiles_string(backbone)
    backbone_deep_smiles = get_smiles_string(backbone, smi_type='deep_smiles')
    backbone_template = get_smiles_string(backbone, smi_type = 'template')
    branch_smiles = [get_smiles_strings(branch) for branch in branches]
    branch_deep_smiles = [get_smiles_string(branch, smi_type = 'deep_smiles') for branch in branches]
    bond_indices = get_bonding_sites()
    
    print(backbone_smiles)
    print(backbone_deep_smiles)
    print(backbone_template)
    print(branch_smiles)
    print(branch_deep_smiles)
    print(bond_indices)

    
    return bond_indices, branch_smiles

## ---------------------------------------------------------------

In [None]:
def poly_smiles_idx(monomer_string, polyindex, length=2, ftype='mol2', string_only=True,
                    energy_min=False, save=False, visualize=False):
    
    monomer_string_d = convert_smiles(smiles=monomer_string)
    
def find_symmetry(string, deep = True):
    '''
    Given a chemical structure, find and classify it's symmetry.
    Groups, axes of symmetry, planes of symmetry, chirality, etc...
    '''
    if deep:
        smiles_string = convert_smiles(string)
    else:
        smiles_string = string
    compound = mb.load(smiles_string, smiles = True)  # mbuild compound
    

In [None]:
def convert_smiles(smiles=False, deep=False):   
    '''
    smiles and deep must be str format
    Converts from SMILES to DeepSMILES and vice versa.
    Whichever has a string provided, will convert to the other.
    If strings are proivded for both, then nothing happens
    '''
    converter = deepsmiles.Converter(rings=True, branches=True)
    if smiles and deep:
        print('Only provide a string for one of smiles or deep')
        return()
    if smiles: # Convert from SMILES to DeepSMILES
        deep_string = converter.encode(smiles)
        return deep_string
    if deep: # Convert from DeepSMILES to SMILES
        smiles_string = converter.decode(deep)
        return smiles_string 