# SELFIES Augmentation Example

#### This example shows how to augment SELFIES strings using the SMILESAugmentation tool.

In [43]:
import pandas as pd

from smiles_augmentation.selfies_enumerators import SelfiesRandomizer

**Load the SMILES data:**

In [44]:
selfies = pd.read_csv('data/selfies.csv').SELFIES.values
selfies

array(['[C][=C][C][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=Branch1][C][=O][N][C][=C][C][=Branch1][=Branch2][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][F][F]',
       '[F][C][=C][C][=C][Branch2][Ring1][=Branch2][N][C][Branch2][Ring1][Ring1][C][=C][Branch1][=Branch2][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][Ring1][N][=O][C][=C][Ring2][Ring1][Branch1][F]',
       '[C][N][C][Branch1][C][P][C][C][Ring1][=Branch1]', '[C][O][C][F]',
       '[C][O][C][=Branch1][C][=O][C][C][C][N][Branch2][Ring2][Ring2][C][C][O][C][C][Branch2][Ring1][O][C][Branch2][Ring1][Branch1][O][C][Ring1][Branch1][N][C][=N][C][=C][Branch1][=Branch2][N][=C][N][=C][Ring1][=Branch1][Ring1][=Branch2][N][C][O][O][C][C][C][Branch2][Ring1][N][C][Branch2][Ring1][#Branch1][C][Branch1][Ring2][O][Ring1][Branch1][N][C][=N][C][=C][Branch1][=Branch2][N][=C][N][=C][Ring1][=Branch1][Ring1][=Branch2][N][O][O]',
       '[C][C][C][=Branch1][C][=O][O][C][C]',
       '[C][C][C][C][B

**Create an SelfiesRandomizer object and enumerate the SELFIES by calling the enumerate method:**

You can define if you want to keep or remove duplicates, define a seed for reproducibility, the number of jobs to run in parallel, the level of verbosity and the maximum number of SMILES to enumerate.

In [45]:
enumerator = SelfiesRandomizer(selfies=selfies, remove_duplicates=True, seed=123, n_jobs=1, verbose=0)
enumerated_selfies = enumerator.enumerate(n_max=10)
enumerated_selfies

[['[C][=C][Branch2][Ring1][#C][C][Branch1][C][F][=C][C][=C][Ring1][#Branch1][N][C][=Branch1][C][=O][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][F]',
  '[N][Branch2][Ring1][Branch1][C][=Branch1][C][=O][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][Branch1][C][F][=C][Branch1][C][F][C][=C][Ring1][Branch2]',
  '[C][Branch2][Ring1][=C][C][=C][Branch1][#Branch1][C][=C][C][=C][Ring1][=Branch1][C][Branch1][S][N][C][=C][C][Branch1][C][F][=C][Branch1][Branch1][C][=C][Ring1][#Branch1][F][=O][=C][C][=C][C][=C][Ring2][Ring1][#Branch1]',
  '[C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=Branch1][C][=O][N][C][=C][C][=C][Branch1][Branch2][C][=Branch1][Ring2][=C][Ring1][=Branch1][F][F]',
  '[C][=Branch2][Ring2][C][=C][C][=Branch1][#Branch1][=C][C][=C][Ring1][=Branch1][F][N][C][=Branch1][C][=O][C][=C][Branch1][#Branch1][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][F]',
  '[O]

**Let’s see the enumerated SMILES for the first compound:**

In [46]:
original_selfies = selfies[0]
print(f"Original SELFIES: {original_selfies}")

new_enumerated_selfies = enumerated_selfies[0]
print(f"New enumerated SELFIES: {new_enumerated_selfies}")

Original SELFIES: [C][=C][C][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=Branch1][C][=O][N][C][=C][C][=Branch1][=Branch2][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][F][F]
New enumerated SELFIES: ['[C][=C][Branch2][Ring1][#C][C][Branch1][C][F][=C][C][=C][Ring1][#Branch1][N][C][=Branch1][C][=O][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][F]', '[N][Branch2][Ring1][Branch1][C][=Branch1][C][=O][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][Branch1][C][F][=C][Branch1][C][F][C][=C][Ring1][Branch2]', '[C][Branch2][Ring1][=C][C][=C][Branch1][#Branch1][C][=C][C][=C][Ring1][=Branch1][C][Branch1][S][N][C][=C][C][Branch1][C][F][=C][Branch1][Branch1][C][=C][Ring1][#Branch1][F][=O][=C][C][=C][C][=C][Ring2][Ring1][#Branch1]', '[C][=C][C][=C][C][=C][Ring1][=Branch1][C][=C][C][=C][C][=C][Ring1][=Branch1][C][=Branch1][C][=O][N][C][=C][C][=C][Branch1][Branch2][C][=Branch1][Ring2][=C][Ring