### This notebook translates images to inchi strings using single network for whole string

In [1]:
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

import torch, torchmetrics, timm, re, pickle, Levenshtein, math
import torch.nn as nn
import torchvision as tv
import pytorch_lightning as pl
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import albumentations as A
from pathlib import Path
from functools import partial
from collections import defaultdict
from fastprogress import progress_bar
from typing import Optional, Union, Tuple
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
from albumentations.pytorch import ToTensorV2
from preprocessing import preprocess_image

from inchi_utils import *

# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
torch.manual_seed(manualSeed);

pl.seed_everything(manualSeed)

# This monkey-patch is there to be able to plot tensors
torch.Tensor.ndim = property(lambda x: len(x.shape))

Global seed set to 999


Random Seed:  999


In [2]:
CHKPTDIR = Path("TONEChkpts")
DATADIR = "data/bms-molecular-translation"
LABELS_CSV_PATH = "data/train_labels.csv"
VOCAB_FILEPATH  = CHKPTDIR/"vocab_dict.pt"
TRAINPATHS_PATH = CHKPTDIR/"train_paths.feather"
TESTPATHS_PATH  = CHKPTDIR/"test_paths.feather"
MODEL_SAVEPATH  = CHKPTDIR/"saved_model.ckpt"
CHKPTDIR.mkdir(parents=True, exist_ok=True)

tb_logger = pl.loggers.TensorBoardLogger(CHKPTDIR, name="inchi_full_exp", default_hp_metric=False)

N_WORKERS = 4
BATCH_SIZE = 256
PRECISION = 16
TRUNCATE_SEQ_TO = 150 # Overriding max length
EMB_SIZE = 256
INP_SIZE = (128, 128)
LR = 1e-2
EPOCHS = 3

In [3]:
!ls {DATADIR}

sample_submission.csv  test  train  train_labels.csv


# Lightning Data Module
### LightningDataModule API

To define a DataModule define 5 methods:
1. prepare_data (how to download(), tokenize, etc…)
2. setup (how to split, etc…)
3. train_dataloader
4. val_dataloader(s)
5. test_dataloader(s)

#### prepare_data
Use this method to do things that might write to disk or that need to be done only from a single process in distributed settings.
1. download
2. tokenize
3. etc…

#### setup
There are also data operations you might want to perform on every GPU. Use setup to do things like:
1. count number of classes
2. build vocabulary
3. perform train/val/test splits
4. apply transforms (defined explicitly in your datamodule or assigned in init)
5. etc…


In [4]:
class ImgtoInChIDataset(Dataset):
    def __init__(self, paths:list, df:pd.DataFrame=None, tsfms:A.Compose=None) -> None:
        self.paths = paths
        if df is not None:
            self.idtoinchi_dict = {
                _id:_inchi for _id, _inchi in
                zip(df["image_id"].values.tolist(), df["InChI"].values.tolist())
            }
        else: # Test time placeholder
            self.idtoinchi_dict = defaultdict(lambda : "test_placeholder", {})
            
        self.tsfms = tsfms
    
    def __len__(self) -> int:
        return len(self.paths)
    
    def __getitem__(self, idx:int) -> Tuple[torch.Tensor, str]:
        imgpath = self.paths[idx]
        imgid = Path(imgpath).stem
        img = np.array(preprocess_image(imgpath, out_size=INP_SIZE), dtype=np.float32)/255.
        if self.tsfms is not None:
            img = self.tsfms(image=img)["image"]
        
        target = self.idtoinchi_dict[imgid]
        return img, target

class ImgToInChIDataModuleONE(pl.LightningDataModule):
    def __init__(self, tb_logger, valset_ratio=0.1) -> None:
        super().__init__()
        self.tb_logger = tb_logger
        self.valset_ratio = valset_ratio
        self.dims = (1, *INP_SIZE)
        
        self.train_tsfms = A.Compose([
            A.Flip(),
#             A.Resize(*INP_SIZE, always_apply=True),
#             A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
#             A.RandomCrop(*INP_SIZE),
#             A.RandomBrightnessContrast(p=0.5),
#             A.Normalize(mean=(0.5), std=(0.229)),
            ToTensorV2(),
        ])
        self.test_tsfms = A.Compose([
#             A.Resize(*INP_SIZE, always_apply=True),
#             A.Normalize(mean=(0.5), std=(0.229)),
            ToTensorV2(),
        ])
        
    def prepare_data(self, verbose:bool=True) -> None:
        """Use this method to do things that might write to disk or that
        need to be done only from a single process in distributed settings."""
        # Load labels in DataFrame
        if verbose: print("Loading labels data...", end=' ')
        self.df = pd.read_csv(LABELS_CSV_PATH)
        if verbose: print("DONE!")
        
        # Load image paths
        if verbose: print("Loading paths...", end=' ')
        if TRAINPATHS_PATH.exists():
            self.train_paths = pd.read_feather(TRAINPATHS_PATH)
            self.train_paths = self.train_paths.train_paths.tolist()
        else:
            if verbose: print("Traning paths file not found. Creating...", end=' ')
            self.train_paths = pd.DataFrame(list((Path(DATADIR)/"train").rglob("*.*")), columns=["train_paths"])
            self.train_paths = self.train_paths.applymap(lambda x: str(x))
            self.train_paths.to_feather(TRAINPATHS_PATH)
            self.train_paths = self.train_paths.train_paths.tolist()
            if verbose: print("DONE!")
        if TESTPATHS_PATH.exists():
            self.test_paths = pd.read_feather(TESTPATHS_PATH)
            self.test_paths = self.test_paths.test_paths.tolist()
        else:
            if verbose: print("Test paths file not found. Creating...", end=' ')
            self.test_paths = pd.DataFrame(list((Path(DATADIR)/"test").rglob("*.*")), columns=["test_paths"])
            self.test_paths = self.test_paths.applymap(lambda x: str(x))
            self.test_paths.to_feather(TESTPATHS_PATH)
            self.test_paths = self.test_paths.test_paths.tolist()
            if verbose: print("DONE!")
        if verbose: print("DONE!")
        
        # Get Vocab and Tokenizer
        if verbose: print("Loading vocab and tokenizer...", end=' ')
        
        if VOCAB_FILEPATH.exists():
            self.tokenizer = Tokenizer.from_file(VOCAB_FILEPATH)
        else:
            if verbose: print("Vocab file not found. Creating...", end=' ')
            vocab = VocabONE.from_inchi_pandas_column(self.df.InChI)
            vocab.save_vocab(VOCAB_FILEPATH)
            self.tokenizer = Tokenizer(vocab)
            if verbose: print("DONE!")
        
        self.vocab_size = len(self.tokenizer.vocab)
        if TRUNCATE_SEQ_TO != None:
            self.max_len = TRUNCATE_SEQ_TO
            self.tokenizer.vocab.max_len = TRUNCATE_SEQ_TO
        else:  
            self.max_len = self.tokenizer.vocab.max_len
        if verbose: print("DONE!")
                
    def setup(self, stage:Optional[str]=None) -> None:
        # Assign train/val datasets for use in dataloaders
        if stage == 'fit' or stage is None:
            trainpaths, valpaths = train_test_split(self.train_paths, test_size=self.valset_ratio)
            self.trainset = ImgtoInChIDataset(trainpaths, self.df, self.train_tsfms)
            self.valset = ImgtoInChIDataset(valpaths, self.df, self.test_tsfms)
            
#             # Sample batch
#             imgs, inp_seqs, attn_masks = next(iter(self.train_dataloader()))
#             self.tb_logger.experiment.add_images("Sample images", imgs)

        # Assign test dataset for use in dataloader(s)
        if stage == 'test' or stage is None:
            self.testset = ImgtoInChIDataset(self.test_paths, tsfms=self.test_tsfms)

    def train_dataloader(self) -> DataLoader:
        return DataLoader(self.trainset, BATCH_SIZE, shuffle=True, collate_fn=self.collate_fn, num_workers=N_WORKERS)

    def val_dataloader(self) -> DataLoader:
        return DataLoader(self.valset, BATCH_SIZE, shuffle=False, collate_fn=self.collate_fn, num_workers=N_WORKERS)
    
    def collate_fn(self, batch:tuple) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        imgs = torch.cat([ins[0].unsqueeze(0) for ins in batch])
        targets = [ins[1] for ins in batch]
        targets = [self.tokenizer.encode(t, pad_to_custom_len=TRUNCATE_SEQ_TO) for t in targets]
        
        batch_inp_seqs = torch.tensor([t["inp_seq"] for t in targets])
        batch_attn_masks = torch.tensor([t["attn_mask"] for t in targets])
        
        return imgs, batch_inp_seqs, batch_attn_masks

In [5]:
# %%time

# dm = ImgToInChIDataModuleONE(tb_logger)
# dm.prepare_data()
# dm.setup('fit')
# imgs, inp_seqs, attn_masks = next(iter(dm.train_dataloader()))
# imgs.shape, inp_seqs.shape, attn_masks.shape

# Model
### At the time of sentence prediction can we use HMMs or [Beam Search](https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Image-Captioning#overview) to make better decisions?

***Try this***:
Use different LSTM layers for each inchi substring like /c /h
```
>>> rnn = nn.LSTM(10, 20, 2)
>>> input = torch.randn(1, 16, 10)
>>> h0 = torch.randn(2, 16, 20)
>>> c0 = torch.randn(2, 16, 20)
```
change number of layers here to num sublayers


[Model from here](https://www.kaggle.com/yasufuminakama/inchi-resnet-lstm-with-attention-starter)

Old model commented here:
<!-- # class Decoder(nn.Module):
#     def __init__(self, vocab_size, max_len, enc_out_channels=128):
#         super().__init__()
#         self.vocab_size = vocab_size
#         self.max_len = max_len
#         self.enc_out_channels = enc_out_channels
#         self.n_pixels = 8*8 if INP_SIZE[0] == 256 else 4*4
#         self.embd = nn.Embedding(self.vocab_size, self.enc_out_channels)
#         self.lstm = nn.LSTM(self.enc_out_channels, self.vocab_size, batch_first=True)
#         self.decfc = nn.Linear(self.enc_out_channels, self.vocab_size)
        
#         # Transformers encoder
#         self.src_mask = self.generate_square_subsequent_mask(BATCH_SIZE)
#         self.pos_encoder = PositionalEncoding(enc_out_channels, 0.1)
#         self.transformer_encoder = nn.TransformerEncoderLayer(enc_out_channels, 1, enc_out_channels, 0.1)
#         self.decoder = nn.Linear(enc_out_channels, self.vocab_size)
        
#     def generate_square_subsequent_mask(self, sz):
#         mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
#         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
#         return mask
# #     def init_hidden_state(self, encoder_out):
# #         mean_encoder_out = encoder_out.mean(dim=1)
# #         h = self.init_h(mean_encoder_out)  # (batch_size, decoder_dim)
# #         c = self.init_c(mean_encoder_out)
# #         return h, c
    
#     def forward(self, encoder_out, inp_seqs):
# #         encoder_out = self.decfc(encoder_out)
# #         emb = self.embd(inp_seqs)
# # #         print(f"emb = {emb.shape}, encoder_out = {encoder_out.shape}")
# #         lstm_out, _ = self.lstm(emb)
# # #         print(lstm_out.shape)
# #         out = lstm_out + encoder_out
    
    
#         # Transformers encoder
#         encoder_out = self.decfc(encoder_out)
#         src = self.embd(inp_seqs) * math.sqrt(self.enc_out_channels)
#         src = self.pos_encoder(src)
# #         print("SRC =", src.shape)
#         output = self.transformer_encoder(src, self.src_mask.to(src.device))
#         output = self.decoder(output)
# #         print(encoder_out.shape, output.shape)
#         output = encoder_out * output
#         return output
    
#     def predict(self, encoder_out, tokenizer):
#         encoder_out = self.decfc(encoder_out)
#         bs = encoder_out.size(0)
#         syn_inp_seqs = torch.tensor(tokenizer.vocab.ctoi(tokenizer.vocab.bos_token), device=encoder_out.device)
#         syn_inp_seqs = torch.repeat_interleave(syn_inp_seqs, bs)
#         syn_inp_seqs = syn_inp_seqs.view(bs, 1)
# # #         print("syn_inp_seqs before emb =", syn_inp_seqs.shape)
        
# #         # Predict next tokens to start token
# #         pred_emb = []
# #         for i in range(self.max_len):
# #             emb = self.embd(syn_inp_seqs)
# #             pred, _ = self.lstm(emb)
# #             pred_emb.append(pred)
# #             syn_inp_seqs = pred.argmax(dim=-1)
# # #         print("len =", len(pred_emb))
# #         pred_emb = torch.cat(pred_emb, dim=1)
# # #         print("pred_emb =", pred_emb.shape)
# #         out = pred_emb + encoder_out
    
    
#         # TF
#         preds = []
#         for i in range(self.max_len):
#             src = self.embd(syn_inp_seqs) * math.sqrt(self.enc_out_channels)
#             src = self.pos_encoder(src)
#             output = self.transformer_encoder(src, self.src_mask.to(src.device))
#             preds.append(output)
#             syn_inp_seqs = output.argmax(dim=-1)
#         preds = torch.cat(preds, dim=1)
#         output = self.decoder(preds)
#         output = encoder_out + output
#         return output -->

In [6]:
class Encoder(nn.Module):
    def __init__(self, model_name='resnet18', max_len=275, out_channels=512, pretrained=False):
        super().__init__()
#         n_out_pixels = 8*8 if INP_SIZE[0] == 256 else 4*4
        self.out_channels = out_channels
        self.cnn = timm.create_model(model_name, pretrained=pretrained)
        self.cnn.conv1 = nn.Conv2d(1, 64, kernel_size=5, stride=2, padding=2, bias=False)
        self.cnn.global_pool = nn.Identity()
        self.cnn.fc = nn.Identity()
        
    def forward(self, x):
        out = self.cnn(x)
        out = out.view(x.size(0), self.out_channels, -1)
        return out

class Decoder(nn.Module):
    def __init__(self, vocab_size, max_len, n_pixels=16, encoder_dim=512):
        super().__init__()
        self.vocab_size = vocab_size
        self.max_len = max_len
        self.encoder_dim = encoder_dim
        
        self.encfc = nn.Linear(n_pixels, 1)
#         self.attn = nn.TransformerEncoderLayer(encoder_dim, 4, encoder_dim)
        self.embd = nn.Embedding(self.vocab_size, self.encoder_dim)
        self.lstm = nn.LSTM(self.encoder_dim, self.encoder_dim, batch_first=True)
        self.decfc = nn.Linear(self.encoder_dim, self.vocab_size)
        
    def forward(self, encoder_out, inp_seqs):
        encoder_out = self.encfc(encoder_out).permute(0, 2, 1)
#         encoder_out = self.attn(encoder_out)
        embs = self.embd(inp_seqs)
        embs = torch.cat((encoder_out, embs), dim=1)
        lstm_out, _ = self.lstm(embs)
        out = self.decfc(lstm_out)
        return out
    
class InChINetONE(pl.LightningModule):
    def __init__(self, vocab_size, max_len, tokenizer):
        super().__init__()
        self.max_len = max_len
        self.tokenizer = tokenizer
        self.encoder_net = Encoder(max_len=max_len-1)
        self.decoder_net = Decoder(vocab_size, max_len)
        self.loss_fn = nn.CrossEntropyLoss()
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, imgs, inp_seqs):
        encoder_out = self.encoder_net(imgs)
#         print("Encoder =", encoder_out.shape)
        output = self.decoder_net(encoder_out, inp_seqs)
        return output
    
    def predict(self, imgs):
        states, outputs = None, []
        with torch.no_grad():
            encoder_out = self.encoder_net(imgs)
            encoder_out = self.decoder_net.encfc(encoder_out).permute(0, 2, 1)
#             inp_seq = self.decoder_net.attn(encoder_out)
            for _ in range(self.max_len):
                lstm_out, states = self.decoder_net.lstm(inp_seq, states)
                out = self.decoder_net.decfc(lstm_out)
                pred = out.argmax(dim=-1).squeeze(1)
                outputs.append(pred.unsqueeze(1))
                inp_seq = self.decoder_net.embd(pred).unsqueeze(1)
        return torch.cat(outputs, dim=1)
    
    def training_step(self, train_batch, batch_idx):
        imgs, inp_seqs, attn_masks = train_batch
        output = self.forward(imgs, inp_seqs[:, :-1])
        if batch_idx == 500:
            print("INP =", inp_seqs[0])
            print("OUT =", output[0].argmax(dim=-1))
        loss = self.loss_fn(output.permute(0,2,1).float(), inp_seqs)
        # Logging to TensorBoard by default
        self.log('train_loss', loss, on_step=True, on_epoch=True, logger=True)
        return loss
    
    def training_epoch_end(self, outputs):
        for name,params in self.named_parameters():
            self.logger.experiment.add_histogram(name, params, self.current_epoch)
    
    def validation_step(self, val_batch, batch_idx):
        with torch.no_grad():
            imgs, inp_seqs, attn_masks = val_batch
            output = self.forward(imgs, inp_seqs[:, :-1])
            loss = self.loss_fn(output.permute(0,2,1).float(), inp_seqs)
            distance = self.compute_distance(imgs, inp_seqs)
        self.log('val_loss', loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.log("Levenshtein distance", distance, on_epoch=True, logger=True, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=LR)
        lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, LR, epochs=EPOCHS, steps_per_epoch=8523)
        return [optimizer], [lr_scheduler]
    
    def compute_distance(self, imgs, inp_seqs):
        outputs = self.predict(imgs)
        avg_distance = [
            Levenshtein.distance(self.tokenizer.decode(inp_seqs[i]), self.tokenizer.decode(outputs[i]))
            for i in range(imgs.size(0))
        ]
        return np.mean(avg_distance)
    
# dm = ImgToInChIDataModuleONE(tb_logger=tb_logger)
# dm.prepare_data(verbose=True)
# dm.setup('fit')
# imgs, inp_seqs, attn_masks = next(iter(dm.train_dataloader()))
# print("inp_seqs =", inp_seqs.shape)

# encoder_net = Encoder()
# encoder_out = encoder_net(imgs.cpu()) 
# encoder_out = torch.rand([BATCH_SIZE, 512, 16])
# print("Encoder =", encoder_out.shape)

# decoder_net = Decoder(dm.vocab_size, dm.max_len)
# decoder_out = decoder_net(encoder_out, inp_seqs[:, :-1])
# print("@forward decoder_out =", decoder_out.shape)

# Training and Validation

In [7]:
%tensorboard --logdir {CHKPTDIR}

dm = ImgToInChIDataModuleONE(tb_logger=tb_logger)
dm.prepare_data(verbose=True)

model = InChINetONE(dm.vocab_size, dm.max_len, dm.tokenizer)
# Add network graph to tensorboard
# tb_logger.log_graph(model, [imgs[0].unsqueeze(0).to(model.device), inp_seqs[0].unsqueeze(0).to(model.device)])
lr_monitor = pl.callbacks.LearningRateMonitor(logging_interval='step')
# checkpoint_callback = pl.callbacks.ModelCheckpoint(monitor='val_loss')
trainer = pl.Trainer(gpus=1, auto_lr_find=True, max_epochs=EPOCHS, precision=PRECISION,
                     profiler=None, deterministic=True, #limit_train_batches=100, limit_val_batches=100,
                     default_root_dir=CHKPTDIR, logger=tb_logger, callbacks=[lr_monitor])

trainer.fit(model, dm)
trainer.save_checkpoint(MODEL_SAVEPATH)

Reusing TensorBoard on port 6006 (pid 2420439), started 0:00:40 ago. (Use '!kill 2420439' to kill it.)

Loading labels data... DONE!
Loading paths... DONE!
Loading vocab and tokenizer... DONE!


GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Using native 16bit precision.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params
-------------------------------------------------
0 | encoder_net | Encoder          | 11.2 M
1 | decoder_net | Decoder          | 3.9 M 
2 | loss_fn     | CrossEntropyLoss | 0     
3 | softmax     | Softmax          | 0     
-------------------------------------------------
15.1 M    Trainable params
0         Non-trainable params
15.1 M    Total params
60.220    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

INP = tensor([  2, 182,  73, 188,  95, 183, 187, 196,  92,  10,  17,   8,  73,   4,
         92,   7, 103,   5,  51,   8, 169,   8,  90,   8, 147,   8, 158,   8,
         91,   4,  51,   5,  18,   8,  29,   8, 125,   8, 114,   8, 136,   8,
         40,   4,  89,   5,  62,   4,  29,   5,  84,  11, 114,   8, 136,   7,
         51,   7,  90, 188,   7, 147,   8,  18, 189,   7,  17,   8, 103, 190,
          3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0], device='cuda:0')
OUT = tensor([  2, 182,  73, 188,  84, 196, 196, 196,  92, 197,  17,   8,  92,   8,
         92,   5, 103,   5,  29,   8, 147,   8,  18,  



In [8]:
imgs, inp_seqs, attn_masks = next(iter(dm.train_dataloader()))
model.eval();
outputs = model.predict(imgs)
avg_distance = []
for i in progress_bar(range(BATCH_SIZE)):
    true = dm.tokenizer.decode(inp_seqs[i])
    pred = dm.tokenizer.decode(outputs[i])
    print("True =", true)
    print("Pred =", pred)
    print()
#     print("Distance =", Levenshtein.distance(true, pred))
    avg_distance.append(Levenshtein.distance(true, pred))
np.mean(avg_distance)

True = InChI=1S/C16H12F2N2O/c1-9-7-8-11(14(18)13(9)17)16(21)15-10-5-3-4-6-12(10)20(2)19-15/h3-8H,1-2H3
Pred = InChI=1S/C14H18N2O2/c1-11(2)9-12(10-13)15-7-5-6-8-14(11)3-4-12(13)16/h3-4,6,9,11H,2,5,7-8H2,1H3

True = InChI=1S/C11H21NO2/c1-4-6-12-7-5-9(10(13)14)11(2,3)8-12/h9H,4-8H2,1-3H3,(H,13,14)
Pred = InChI=1S/C14H18N2O2/c1-11(2)9-12(10-13)15-7-5-6-8-14(11)3-4-12(13)16/h3-4,6,9,11H,2,5,7-8H2,1H3

True = InChI=1S/C17H19FN2O3/c1-12-14(18)6-4-7-15(12)20-17(22)19-11-13-5-2-3-8-16(13)23-10-9-21/h2-8,21H,9-11H2,1H3,(H2,19,20,22)
Pred = InChI=1S/C14H18N2O2/c1-11(2)9-12(10-13)15-7-5-6-8-14(11)3-4-12(13)16/h3-4,6,9,11H,2,5,7-8H2,1H3

True = InChI=1S/C11H10N2OS2/c1-15-10-12-6-11(16-10)7-4-2-3-5-8(7)13-9(11)14/h2-5H,6H2,1H3,(H,13,14)/t11-/m1/s1
Pred = InChI=1S/C14H18N2O2/c1-11(2)9-12(10-13)15-7-5-6-8-14(11)3-4-12(13)16/h3-4,6,9,11H,2,5,7-8H2,1H3

True = InChI=1S/C23H26Cl2F3N3O4S/c1-4-19(22(33)29-5-2)30(13-15-8-6-7-9-17(15)24)21(32)14-31(36(3,34)35)20-12-16(23(26,27)28)10-11-18(20)25/h6-12,19H,4-5

77.21875