## Installation

```mamba install -c conda-forge molfeat datamol```

or

```conda install -c conda-forge molfeat datamol```

# DataMol framework

In [None]:
import datamol as dm
from datamol import standardize_smiles

# read
df = dm.read_csv('train.csv', smiles_column='smi', index_col=0)
df

In [None]:
# viz to_image
dm.viz.to_image(df.mol, mol_size=(200, 200))

In [None]:
df.mol[2]

In [None]:
# strip to core
dm.strip_mol_to_core(df.mol[2])

# Molfeat

In [28]:
# fpvec transformers
import numpy as np
from molfeat.trans import FPVecTransformer, FeatConcat
from molfeat.trans import FPVecTransformer, FeatConcat



trans = FPVecTransformer('desc2D', do_not_standardize=True)

with dm.without_rdkit_log():
    res = trans(df.mol)

print(res.shape, len(trans))
# print(res)


(10, 131) 131


In [None]:
# featconcat

trans = FeatConcat([
    FPVecTransformer('ecfp:4', length=32, dtype=int),
    FPVecTransformer('avalon', length=32, dtype=int),
])

with dm.without_rdkit_log():
    res = trans(df.mol)

res.shape

In [38]:
# single calculators
from molfeat.calc import MordredDescriptors, RDKitDescriptors2D

mord = MordredDescriptors()

mord(df.mol[0])

array([ 15.77486167,  13.37054663,   0.        , ..., 152.        ,
         6.54166667,   3.79861111])

In [40]:
calc = RDKitDescriptors2D()
calc(df.mol[0])

[08:27:43] Initializing MetalDisconnector
[08:27:43] Running MetalDisconnector
[08:27:43] Initializing Normalizer
[08:27:43] Running Normalizer


FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/alexander.korolyov/miniforge3/envs/chem/Library\\share/RDKit\\Data\\Salts.txt'

In [None]:
ADD_3D_FEATS = True

feats2D_params = [
    # fps
    {'kind': 'ecfp:4', 'length': 1024},    
    {'kind': 'topological', 'length': 1024, 'n_jobs': -1},
    {'kind': 'avalon', 'length': 1024, 'n_jobs': -1},
    {'kind': 'layered', 'length': 1024, 'n_jobs': -1},
    {'kind': 'secfp', 'length': 1024, 'n_jobs': -1},
    {'kind': 'pattern', 'length': 1024, 'n_jobs': -1},
    {'kind': 'pharm2D', 'length': 1024, 'n_jobs': -1},
    
    # fixed length fps
    {'kind': 'erg', 'n_jobs': -1},
    {'kind': 'maccs', 'n_jobs': -1},
    
    # fixed length descriptors - to normalize
    {'kind': 'estate', 'n_jobs': -1},
    {'kind': 'desc2D', 'n_jobs': -1},
    {'kind': 'mordred', 'n_jobs': -1},
    {'kind': 'cats2D', 'n_jobs': -1},
    {'kind': 'scaffoldkeys', 'n_jobs': -1},
    {'kind': 'skeys', 'n_jobs': -1},
]

feats3D_params = [
    {'kind': 'pharm3D', 'length': 1024, 'n_jobs': -1},
    {'kind': 'desc3D', 'n_jobs': -1},
    {'kind': 'cats3D', 'n_jobs': -1},
    {'kind': 'electroshape', 'n_jobs': -1},
    {'kind': 'usr'},
    {'kind': 'usrcat'},
]

# HuggingFace Models

In [None]:
from molfeat.store.modelstore import ModelStore
store = ModelStore()

In [None]:
# list of huggingface models in store
hf_models = [x.name for x in store.available_models if x.group == 'huggingface']
print(hf_models)

In [None]:
[x.description for x in store.available_models if x.name == 'ChemBERTa-77M-MTR']    

In [None]:
from molfeat.trans.pretrained.hf_transformers import PretrainedHFTransformer

for name in hf_models:
    trans = PretrainedHFTransformer(name)
    print(name, trans(['C1NCC1']).shape)