# Chemical structure standardisation with AMBIT

- ambitcli - command line 
- Guide and download: http://ambit.sourceforge.net/ambitcli_standardisation.html
- Download: https://zenodo.org/record/1145812
- Used to standardize [ExCAPE-DB](https://jcheminf.springeropen.com/articles/10.1186/s13321-017-0203-5)

This is a Jupyter notebook using BeakerX kernels. The main kernel is Python, mainly to demonstrate the usage of Java code in Python notebook.  

### Configuring Maven repositories and Maven dependencies

In [None]:
%%java
%classpath config resolver mvnLocal
%classpath config resolver nexus-idea-releases https://nexus.ideaconsult.net/content/repositories/releases
%classpath config resolver nexus-idea-snapshots https://nexus.ideaconsult.net/content/repositories/snapshots
%classpath add mvn ambit ambit2-tautomers 4.0.0-SNAPSHOT
%classpath add mvn ambit ambit2-dbcli 4.0.0-SNAPSHOT

### Standardize single structure

In [None]:
%%java
import ambit2.tautomers.processor.StructureStandardizer;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.smiles.SmilesGenerator;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.silent.SilentChemObjectBuilder;

SmilesParser sp = new SmilesParser(SilentChemObjectBuilder.getInstance());
IAtomContainer mol = sp.parseSmiles("CC(=O)CC(C1=CC=CC=C1)C2=C(C3=CC=CC=C3OC2=O)O");
StructureStandardizer std = new StructureStandardizer();
System.out.println(String.format("Clear isotopes %s\tGenerate 2D %s\tGenerate InChI %s\tGenerate SMILES %s\tAromatic %s\tCanonical %s\nStereo from 2D %s\tTautomers %s\tImplicit H %s\tNeutralise %s\tSplit fragments %s",
        std.isClearIsotopes(),
        std.isGenerate2D(),
        std.isGenerateInChI(),
        std.isGenerateSMILES(),
        std.isGenerateSMILES_Aromatic(),
        std.isGenerateSMILES_Canonical(),
        std.isGenerateStereofrom2D(),
        std.isGenerateTautomers(),
        std.isImplicitHydrogens(),
        std.isNeutralise(),
        std.isSplitFragments()
                                ));
IAtomContainer mol_std = std.process(mol);
return SmilesGenerator.generic().create(mol_std);

### Standardize file with chemical structures
- using tab-delimited file with SMILES column

In [3]:
import pandas as pd
df = pd.read_csv("test.txt")
(df)

Unnamed: 0,SMILES
0,CC(=O)CC(C1=CC=CC=C1)C2=C(C3=CC=CC=C3OC2=O)O
1,c1cc(oc1/C=C/[N+](=O)[O-])[N+](=O)[O-]


In [None]:
%%java
import ambit2.dbcli.AmbitCli;
import ambit2.dbcli.CliOptions;

String infile="test.txt";
String out = "test_std.txt";

String[] args = new String[] { "-a", "standardize", "-i", infile, "-m", "post", "-o", out, "-d",
            "smiles=true", "-d", "inchi=true", "-d", "tautomers=true" };
CliOptions options = new CliOptions();
if (options.parse(args))
try {
    AmbitCli cli = new AmbitCli(options);
    cli.go(options.getCmd(), options.getSubcommand().name());
} finally {
    // (new File(out)).delete();
}
return out;

In [5]:
df=pd.read_csv("./test_std.txt",sep='\t', keep_default_na=False)
(df)

Unnamed: 0,InChIKey,InChI,SMILES,RANK,ERROR,cdk:Title
0,FQEPJUOLUDFINX-UHFFFAOYNA-N,InChI=1/C19H16O4/c1-12(20)11-15(13-7-3-2-4-8-1...,CC(=O)CC(C1=CC=CC=C1)C2C(C3=CC=CC=C3OC2=O)=O,-10.35,,
1,NMZDNQIJZAERIO-ONEGZZNKNA-N,InChI=1/C6H4N2O5/c9-7(10)4-3-5-1-2-6(13-5)8(11...,C=1C=C(OC1/C=C/[N+](=O)[O-])[N+](=O)[O-],,,
