Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch/mdlv3000 updates #824

Merged
merged 7 commits into from
Feb 7, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IChemSequence;
import org.openscience.cdk.interfaces.IElement;
import org.openscience.cdk.interfaces.IIsotope;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
Expand Down Expand Up @@ -127,9 +128,10 @@ public class MDLV2000Reader extends DefaultChemObjectReader {
BufferedReader input = null;
private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(MDLV2000Reader.class);

private BooleanIOSetting forceReadAs3DCoords;
private BooleanIOSetting interpretHydrogenIsotopes;
private BooleanIOSetting addStereoElements;
private BooleanIOSetting optForce3d;
private BooleanIOSetting optHydIso;
private BooleanIOSetting optStereoPerc;
private BooleanIOSetting optStereo0d;

// Pattern to remove trailing space (String.trim() will remove leading space, which we don't want)
private static final Pattern TRAILING_SPACE = Pattern.compile("\\s+$");
Expand Down Expand Up @@ -412,7 +414,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
// 0123456789012345678901
if (is3Dfile(program)) {
hasZ = true;
} else if (!forceReadAs3DCoords.isSet()) {
} else if (!optForce3d.isSet()) {
for (IAtom atomToUpdate : atoms) {
Point3d p3d = atomToUpdate.getPoint3d();
if (p3d != null) {
Expand Down Expand Up @@ -455,40 +457,12 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
}

// create 0D stereochemistry
if (addStereoElements.isSet()) {
Parities:
if (optStereoPerc.isSet() && optStereo0d.isSet()) {
for (Map.Entry<IAtom, Integer> e : parities.entrySet()) {
int parity = e.getValue();
if (parity != 1 && parity != 2)
continue; // 3=unspec
int idx = 0;
IAtom focus = e.getKey();
IAtom[] carriers = new IAtom[4];
int hidx = -1;
for (IAtom nbr : outputContainer.getConnectedAtomsList(focus)) {
if (idx == 4)
continue Parities; // too many neighbors
if (nbr.getAtomicNumber() == 1) {
if (hidx >= 0)
continue Parities;
hidx = idx;
}
carriers[idx++] = nbr;
}
// to few neighbors, or already have a hydrogen defined
if (idx < 3 || idx < 4 && hidx >= 0)
continue;
if (idx == 3)
carriers[idx++] = focus;

if (idx == 4) {
Stereo winding = parity == 1 ? Stereo.CLOCKWISE : Stereo.ANTI_CLOCKWISE;
// H is always at back, even if explicit! At least this seems to be the case.
// we adjust the winding as needed
if (hidx == 0 || hidx == 2)
winding = winding.invert();
outputContainer.addStereoElement(new TetrahedralChirality(focus, carriers, winding));
}
IStereoElement<IAtom,IAtom> stereoElement
= createStereo0d(outputContainer, e.getKey(), e.getValue());
if (stereoElement != null)
molecule.addStereoElement(stereoElement);
}
}

Expand Down Expand Up @@ -516,7 +490,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
// don't have a hydrogen count for atoms and stereo perception isn't
// currently possible
if (!(outputContainer instanceof IQueryAtomContainer) && !isQuery &&
addStereoElements.isSet() && hasX && hasY) {
optStereoPerc.isSet() && hasX && hasY) {
//ALS property could have changed an atom into a QueryAtom
for(IAtom atom : outputContainer.atoms()){
if (AtomRef.deref(atom) instanceof QueryAtom) {
Expand All @@ -528,7 +502,7 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
if (hasZ) { // has 3D coordinates
outputContainer.setStereoElements(StereoElementFactory.using3DCoordinates(outputContainer)
.createAll());
} else if (!forceReadAs3DCoords.isSet()) { // has 2D coordinates (set as 2D coordinates)
} else if (!optForce3d.isSet()) { // has 2D coordinates (set as 2D coordinates)
outputContainer.setStereoElements(StereoElementFactory.using2DCoordinates(outputContainer)
.createAll());
}
Expand Down Expand Up @@ -559,6 +533,40 @@ private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKExce
return outputContainer;
}

static IStereoElement<IAtom, IAtom> createStereo0d(IAtomContainer mol, IAtom focus, int parity) {
if (parity != 1 && parity != 2)
return null; // 3=unspec
int numNbrs = 0;
IAtom[] carriers = new IAtom[4];
int idxOfHyd = -1;
for (IAtom nbr : mol.getConnectedAtomsList(focus)) {
if (numNbrs == 4)
return null; // too many neighbors
if (nbr.getAtomicNumber() == IElement.H) {
if (idxOfHyd >= 0)
return null; // too many hydrogens
idxOfHyd = numNbrs;
}
carriers[numNbrs++] = nbr;
}
// incorrect number of neighbours?
if (numNbrs < 3 || numNbrs < 4 && idxOfHyd >= 0)
return null;
// implicit neighbour (H or lone-pair)
if (numNbrs == 3)
carriers[numNbrs++] = focus;
if (numNbrs != 4)
return null;

Stereo winding = parity == 1 ? Stereo.CLOCKWISE : Stereo.ANTI_CLOCKWISE;
// H is always at back, even if explicit! At least this seems to be the case.
// we adjust the winding as needed which is when the explict H is in slot
// 0 or 2 (odd number of swaps to get to index 3)
if (idxOfHyd == 0 || idxOfHyd == 2)
winding = winding.invert();
return new TetrahedralChirality(focus, carriers, winding);
}

private boolean is3Dfile(String program) {
return program.length() >= 22 && program.substring(20, 22).equals("3D");
}
Expand Down Expand Up @@ -625,12 +633,14 @@ public void close() throws IOException {
}

private void initIOSettings() {
forceReadAs3DCoords = addSetting(new BooleanIOSetting("ForceReadAs3DCoordinates", IOSetting.Importance.LOW,
optForce3d = addSetting(new BooleanIOSetting("ForceReadAs3DCoordinates", IOSetting.Importance.LOW,
"Should coordinates always be read as 3D?", "false"));
interpretHydrogenIsotopes = addSetting(new BooleanIOSetting("InterpretHydrogenIsotopes",
optHydIso = addSetting(new BooleanIOSetting("InterpretHydrogenIsotopes",
IOSetting.Importance.LOW, "Should D and T be interpreted as hydrogen isotopes?", "true"));
addStereoElements = addSetting(new BooleanIOSetting("AddStereoElements", IOSetting.Importance.LOW,
optStereoPerc = addSetting(new BooleanIOSetting("AddStereoElements", IOSetting.Importance.LOW,
"Detect and create IStereoElements for the input.", "true"));
optStereo0d = addSetting(new BooleanIOSetting("AddStereo0d", IOSetting.Importance.LOW,
"Allow stereo created from parity value when no coordinates", "true"));
}

public void customizeJob() {
Expand Down Expand Up @@ -1520,13 +1530,13 @@ private IAtom createAtom(String symbol, IChemObjectBuilder builder, int lineNum)
atom.setAtomicNumber(elem.number());
return atom;
}
if (symbol.equals("D") && interpretHydrogenIsotopes.isSet()) {
if (symbol.equals("D") && optHydIso.isSet()) {
handleError("invalid symbol: " + symbol, lineNum, 31, 33);
IAtom atom = builder.newInstance(IAtom.class, "H");
atom.setMassNumber(2);
return atom;
}
if (symbol.equals("T") && interpretHydrogenIsotopes.isSet()) {
if (symbol.equals("T") && optHydIso.isSet()) {
handleError("invalid symbol: " + symbol, lineNum, 31, 33);
IAtom atom = builder.newInstance(IAtom.class, "H");
atom.setMassNumber(3);
Expand Down