In [1]:
from fairseq_utils import preprocess_series, load_dataset, load_model, get_embeddings
from fairseq.data import Dictionary
from constants import TOKENIZER_SUFFIXES, TOKENIZER_PATH, FAIRSEQ_PREPROCESS_PATH, PROJECT_PATH, PREDICTION_MODEL_PATH, TASK_PATH
import pandas as pd

molecules = ["CCC", "CC"]

def embed_all(path, cuda=0):
    output_dict = dict()
    for model_type in ["bart","roberta"]:
        tokenizer_dict = dict()
        for tokenizer_suffix in TOKENIZER_SUFFIXES:
            tokenizer_dict[tokenizer_suffix] = embed(path, model_type, tokenizer_suffix, cuda)
        output_dict[model_type] = tokenizer_dict 
    return output_dict

def embed(path, model_type, tokenizer_suffix, cuda):
    model_suffix = tokenizer_suffix+"_"+model_type
    fairseq_dict_path = TASK_PATH / "bbbp" /tokenizer_suffix
    model_path = PREDICTION_MODEL_PATH/model_suffix/"checkpoint_last.pt"
    model = load_model(model_path,fairseq_dict_path,str(cuda))
    dataset_path = (path / tokenizer_suffix/ "input0")
    dataset = load_dataset(dataset_path/"train")
    source_dictionary = Dictionary.load(str(dataset_path/"dict.txt"))
    embeddings = get_embeddings(model, dataset, source_dictionary, whole_mol=True, cuda=cuda)
    return embeddings
    
latent_geometry_path = PROJECT_PATH/"latent_space_geometry"
preprocess_series(molecules, latent_geometry_path)
embedding_dict = embed_all(latent_geometry_path, 1)

  from .autonotebook import tqdm as notebook_tqdm
2025-08-18 14:05:24 | INFO | rdkit | Enabling RDKit 2022.09.3 jupyter extensions
Tokenizing dataset: 100%|█████████████████████████████████████| 2/2 [00:00<00:00, 85.77it/s]


fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_isomers/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_isomers/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_isomers/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/smiles_atom_isomers/dict.txt --workers 60


2025-08-18 14:05:27 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_standard/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_standard/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_standard/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/smiles_atom_standard/dict.txt --workers 60


2025-08-18 14:05:29 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_isomers/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_isomers/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_isomers/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/smiles_trained_isomers/dict.txt --workers 60


2025-08-18 14:05:32 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_standard/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_standard/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_standard/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/smiles_trained_standard/dict.txt --workers 60


2025-08-18 14:05:35 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_isomers/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_isomers/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_isomers/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/selfies_atom_isomers/dict.txt --workers 60


2025-08-18 14:05:37 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_standard/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_standard/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_standard/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/selfies_atom_standard/dict.txt --workers 60


2025-08-18 14:05:40 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_isomers/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_isomers/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_isomers/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/selfies_trained_isomers/dict.txt --workers 60


2025-08-18 14:05:42 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

fairseq-preprocess --only-source --trainpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_standard/train.input --testpref /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_standard/test.input--destdir /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_standard/input0 --srcdict /data/jgut/SMILES_or_SELFIES/fairseq_preprocess/selfies_trained_standard/dict.txt --workers 60


2025-08-18 14:05:45 | INFO | fairseq_cli.preprocess | Namespace(no_progress_bar=False, log_interval=100, log_format=None, log_file=None, aim_repo=None, aim_run_hash=None, tensorboard_logdir=None, wandb_project=None, azureml_logging=False, seed=1, cpu=False, tpu=False, bf16=False, memory_efficient_bf16=False, fp16=False, memory_efficient_fp16=False, fp16_no_flatten_grads=False, fp16_init_scale=128, fp16_scale_window=None, fp16_scale_tolerance=0.0, on_cpu_convert_precision=False, min_loss_scale=0.0001, threshold_loss_scale=None, amp=False, amp_batch_retries=2, amp_init_scale=128, amp_scale_window=None, user_dir=None, empty_cache_freq=0, all_gather_list_size=16384, model_parallel_size=1, quantization_config_path=None, profile=False, reset_logging=False, suppress_crashes=False, use_plasma_view=False, plasma_path='/tmp/plasma', criterion='cross_entropy', tokenizer=None, bpe=None, optimizer=None, lr_scheduler='fixed', scoring='bleu', task='translation', source_lang=None, target_lang=None, tr

2025-08-18 14:06:04 | INFO | fairseq.models.bart.model | Registering classification head: sentence_classification_head
2025-08-18 14:06:04 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_isomers/input0/train
100%|█████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 85.48it/s]
2025-08-18 14:06:04 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/selfies_trained_standard_bart
2025-08-18 14:06:04 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/selfies_trained_standard
2025-08-18 14:06:04 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 937 types
2025-08-18 14:06:05 | INFO | fairseq.models.bart.model | Registering classification head: sentence_classification_head
2025-08-18 14:06:06 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geo

2025-08-18 14:06:08 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_isomers/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 138.96it/s]
2025-08-18 14:06:08 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/smiles_atom_standard_roberta
2025-08-18 14:06:08 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/smiles_atom_standard
2025-08-18 14:06:09 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 369 types
2025-08-18 14:06:10 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_effic

2025-08-18 14:06:11 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_atom_standard/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 140.58it/s]
2025-08-18 14:06:11 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/smiles_trained_isomers_roberta
2025-08-18 14:06:11 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/smiles_trained_isomers
2025-08-18 14:06:11 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 905 types
2025-08-18 14:06:13 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_

2025-08-18 14:06:13 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_isomers/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 131.28it/s]
2025-08-18 14:06:14 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/smiles_trained_standard_roberta
2025-08-18 14:06:14 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/smiles_trained_standard
2025-08-18 14:06:14 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 905 types
2025-08-18 14:06:15 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'mem

2025-08-18 14:06:16 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/smiles_trained_standard/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 131.30it/s]
2025-08-18 14:06:16 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/selfies_atom_isomers_roberta
2025-08-18 14:06:16 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/selfies_atom_isomers
2025-08-18 14:06:16 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 577 types
2025-08-18 14:06:19 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_e

2025-08-18 14:06:19 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_isomers/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 139.10it/s]
2025-08-18 14:06:19 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/selfies_atom_standard_roberta
2025-08-18 14:06:19 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/selfies_atom_standard
2025-08-18 14:06:20 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 377 types
2025-08-18 14:06:21 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_ef

2025-08-18 14:06:22 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_atom_standard/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 136.10it/s]
2025-08-18 14:06:22 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/selfies_trained_isomers_roberta
2025-08-18 14:06:22 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/selfies_trained_isomers
2025-08-18 14:06:22 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 929 types
2025-08-18 14:06:24 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memo

2025-08-18 14:06:24 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_isomers/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 138.66it/s]
2025-08-18 14:06:24 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/prediction_models/selfies_trained_standard_roberta
2025-08-18 14:06:24 | INFO | fairseq.file_utils | loading archive file /data/jgut/SMILES_or_SELFIES/task/bbbp/selfies_trained_standard
2025-08-18 14:06:24 | INFO | fairseq.tasks.sentence_prediction | [input] dictionary: 937 types
2025-08-18 14:06:26 | INFO | fairseq.models.roberta.model | {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 100, 'log_format': None, 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': None, 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, '

2025-08-18 14:06:26 | INFO | fairseq.data.data_utils | loaded 2 examples from: /data/jgut/SMILES_or_SELFIES/latent_space_geometry/selfies_trained_standard/input0/train
100%|████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 150.62it/s]


In [2]:
embedding_dict

{'bart': {'smiles_atom_isomers': [array([-2.42190495e-01,  1.60543597e+00,  1.19077992e+00, -8.39337707e-01,
          -1.28168359e-01, -8.36360276e-01, -1.49249756e+00,  2.14236808e+00,
           6.76059008e-01,  4.74864304e-01, -9.28797245e-01,  7.71617591e-01,
           5.76583624e-01, -1.90318871e+00, -9.53145400e-02, -4.61041659e-01,
           7.88458705e-01,  4.47579652e-01, -1.38522828e+00,  3.95882487e-01,
          -8.90882552e-01, -3.26410198e+00,  2.51583219e-01, -4.23518151e-01,
           1.22336471e+00,  1.70989335e+00,  1.35319054e-01, -1.54947805e+00,
           1.78836358e+00, -2.46007740e-01, -1.17901087e+00,  2.60721850e+00,
          -7.01201200e-01, -8.88983533e-02, -1.58384532e-01,  1.56221700e+00,
           1.34234130e-01, -1.81152606e+00, -3.97236943e-01,  6.04623675e-01,
           6.22656047e-01, -7.31084704e-01, -5.41664124e-01, -9.34890330e-01,
          -8.93053830e-01, -8.48939776e-01,  7.99117982e-01, -1.33471370e+00,
           6.84394956e-01,  1.192