Permalink
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
326 lines (269 sloc) 10.5 KB
/*
WebMolKit
(c) 2010-2018 Molecular Materials Informatics, Inc.
All rights reserved
http://molmatinf.com
[PKG=webmolkit]
*/
///<reference path='../decl/corrections.d.ts'/>
///<reference path='../util/util.ts'/>
///<reference path='../data/Molecule.ts'/>
///<reference path='../data/MolUtil.ts'/>
///<reference path='../data/DataSheet.ts'/>
///<reference path='../data/BondArtifact.ts'/>
namespace WebMolKit /* BOF */ {
/*
MDL Molfile writer: convert the native structure format to MDL Molfile, V2000. Note that the destination format has legacy
problems out the wazoo, and even retroactive improvements (e.g. V3000) tend to be not supported by most implementations. The
bottom line is that the best strategy is generally to stick with a lowest common denominator subset, and try to transition away
from the format.
MDL SDfile writer: including non-molecule fields in collections. The SDfile format is problematic for as many reasons as the
molecules; the best strategy is to avoid using it whenever possible, but it is so often not.
*/
export class MDLMOLWriter
{
// options
public includeHeader = true; // if on, the 3 line header will be included
public enhancedFields = true; // if on, non-standard MDL fields may be added
public chargeSeparate = true; // if on, zero bonds will be split out
public molName = ''; // optional name to include in the header (if any)
// content in progress
private lines:string[] = [];
// ----------------- public methods -----------------
constructor(public mol:Molecule)
{
}
// write out the MDL content
public write():string
{
if (this.includeHeader)
{
this.lines.push(this.molName);
this.lines.push('Generated by WebMolKit');
this.lines.push('');
this.writeCTAB();
}
return this.lines.join('\n');
}
public getResult():string
{
return this.lines.join('\n');
}
// ----------------- private methods -----------------
// writes the main block
private writeCTAB():void
{
let mol = this.mol;
for (let n = 1; n <= mol.numAtoms; n++) if (MolUtil.hasAbbrev(mol, n))
{
mol = mol.clone();
MolUtil.expandAbbrevs(mol, true);
break;
}
this.lines.push(this.intrpad(mol.numAtoms, 3) + this.intrpad(mol.numBonds, 3) + ' 0 0 0 0 0 0 0 0999 V2000');
// data to record in the following M-block
let chgidx:number[] = [], chgval:number[] = [];
let radidx:number[] = [], radval:number[] = [];
let isoidx:number[] = [], isoval:number[] = [];
let rgpidx:number[] = [], rgpval:number[] = [];
let hydidx:number[] = [], hydval:number[] = [];
let zchidx:number[] = [], zchval:number[] = [];
let zboidx:number[] = [], zboval:number[] = [];
// store the original molecule in 'xmol'; after this point, 'mol' will be dumbed down to fit in standard MDL fields
/* TODO....
let xmol = mol;
if (chargeSeparate && ChargeSeparator.anyZeroBonds(mol))
{
ChargeSeparator sep = new ChargeSeparator(mol);
sep.process();
mol = sep.getResult();
}
Molecule rmol = MolUtil.reduceBondTypes(mol);
if (rmol != null) mol = rmol;*/
// export atoms, and make a few notes along the way
for (let n = 1; n <= mol.numAtoms; n++)
{
let x = mol.atomX(n), y = mol.atomY(n), z = 0;
let line = this.rpad(x.toFixed(4), 10) + this.rpad(y.toFixed(4), 10) + this.rpad(z.toFixed(4), 10);
let el = mol.atomElement(n);
let str = el;
if (str.length > 3) str = str.substring(0, 3);
if (str.length > 1 && str.charAt(0) == 'R' && str.charAt(1) >= '0' && str.charAt(1) <= '9')
{
rgpidx.push(n);
rgpval.push(parseInt(str.substring(1)));
str = 'R#';
}
while (str.length < 4) str += ' ';
line += ' ' + str + '0';
let chg = mol.atomCharge(n), rad = mol.atomUnpaired(n), mapnum = mol.atomMapNum(n);
if (chg >= -3 && chg <= -1) chg = 4 - chg;
else if (chg == 0 && rad == 2) chg = 4;
else if (chg >= 1 && chg <= 3) chg = 4 - chg;
else chg = 0;
let val = this.mdlValence(mol, n, 15);
line += this.intrpad(chg, 3) + ' 0 0 0' + this.intrpad(val, 3) + ' 0 0 0' + this.intrpad(mapnum, 3) + ' 0 0';
this.lines.push(line);
if (mol.atomCharge(n) != 0) {chgidx.push(n); chgval.push(mol.atomCharge(n));}
if (this.enhancedFields)
{
if (mol.atomHExplicit(n) != Molecule.HEXPLICIT_UNKNOWN) {hydidx.push(n); hydval.push(mol.atomHExplicit(n));}
// these are for retroactive bond separation, not implemented at the moment
//if (xmol.atomCharge(n) != mol.atomCharge(n)) {zchidx.push(n); zchval.push(xmol.atomCharge(n));}
//if (xmol.atomHExplicit(n) != Molecule.HEXPLICIT_UNKNOWN) {hydidx.push(n); hydval.push(xmol.atomHExplicit(n));}
}
if (mol.atomUnpaired(n) != 0) {radidx.push(n); radval.push(mol.atomUnpaired(n));}
if (mol.atomIsotope(n) != Molecule.ISOTOPE_NATURAL) {isoidx.push(n); isoval.push(mol.atomIsotope(n));}
}
// export bonds
for (let n = 1; n <= mol.numBonds; n++)
{
let order = mol.bondOrder(n), type = Math.max(1, Math.min(3, order));
let stereo = mol.bondType(n);
if (stereo == Molecule.BONDTYPE_NORMAL) {}
else if (stereo == Molecule.BONDTYPE_INCLINED) {stereo = 1; type = 1;}
else if (stereo == Molecule.BONDTYPE_DECLINED) {stereo = 6; type = 1;}
else if (stereo == Molecule.BONDTYPE_UNKNOWN) {stereo = 4; type = 1;}
else stereo = 0;
let line = this.intrpad(mol.bondFrom(n), 3) + this.intrpad(mol.bondTo(n), 3) +
this.intrpad(type, 3) + this.intrpad(stereo, 3) + ' 0 0 0';
this.lines.push(line);
if (this.enhancedFields)
{
if (order < 1 || order > 3) {zboidx.push(n); zboval.push(order);}
// these are for retroactive bond separation, not implemented at the moment
//if (xmol.bondOrder(n) != mol.bondOrder(n)) {zboidx.push(n); zboval.push(xmol.bondOrder(n));}
}
}
// export the additional blocks
this.writeMBlockPair('CHG', chgidx, chgval);
this.writeMBlockPair('RAD', radidx, radval);
this.writeMBlockPair('ISO', isoidx, isoval);
this.writeMBlockPair('RGP', rgpidx, rgpval);
this.writeMBlockPair('HYD', hydidx, hydval);
this.writeMBlockPair('ZCH', zchidx, zchval);
this.writeMBlockPair('ZBO', zboidx, zboval);
// write bond artifacts, one line each
if (this.enhancedFields)
{
let artifacts = new BondArtifact(this.mol);
let idx = 0;
for (let path of artifacts.getResPaths()) this.writeMBlockFlat('ZPA', ++idx, path.atoms);
for (let ring of artifacts.getResRings()) this.writeMBlockFlat('ZRI', ++idx, ring.atoms);
for (let arene of artifacts.getArenes()) this.writeMBlockFlat('ZAR', ++idx, Vec.prepend(arene.atoms, arene.centre));
}
// export long atom names
for (let n = 1; n <= mol.numAtoms; n++) if (mol.atomElement(n).length > 2)
{
this.lines.push('A ' + this.intrpad(n, 3));
this.lines.push(mol.atomElement(n));
}
this.lines.push('M END');
}
// writes a specific sub-block, e.g. M__CHG, etc., where each pair of idx/val is a separate entity
private writeMBlockPair(token:string, idx:number[], val:number[])
{
const sz = idx.length;
for (let i = 0; i < sz; i += 8)
{
let count = Math.min(8, sz - i);
let line = "M " + token + this.intrpad(count, 3);
for (let j = 0; j < count; j++) line += this.intrpad(idx[i + j], 4) + this.intrpad(val[i + j], 4);
this.lines.push(line);
}
}
// writes a specific sub-block, whereby the master index corresponds to some number of values; these are split over multiple
// lines if necessary
private writeMBlockFlat(token:string, idx:number, val:number[])
{
const sz = val.length;
for (let i = 0; i < sz; i += 15)
{
let count = Math.min(15, sz - i);
let line = "M " + token + this.intrpad(count, 3);
line += this.intrpad(idx, 4);
for (let j = 0; j < count; j++) line += this.intrpad(val[i + j], 4);
this.lines.push(line);
}
}
// convenient formatting
private intrpad(num:number, sz:number):string
{
return this.rpad(num.toString(), sz);
}
private rpad(str:string, sz:number):string
{
while (str.length < sz) str = ' ' + str;
return str;
}
// figures out the MDL valence override, if any; a return value of 0 means that the calculated default will suffice; if the value
// needs to be explicitly zero, the 'zeroVal' parameter is returned (should be 15 for V2000, -1 for V3000)
private mdlValence(mol:Molecule, atom:number, zeroVal:number):number
{
let hyd = mol.atomHydrogens(atom), el = mol.atomElement(atom);
let options = MDLMOL_VALENCE[el];
// if no implicit valence, and no hydrogens: no need
if (options == null && hyd == 0) return 0;
let chg = mol.atomCharge(atom);
let chgmod = (el == 'C' || el == 'H') ? Math.abs(chg) : el == 'B' ? -Math.abs(chg) : -chg;
let bondSum = 0;
for (let b of mol.atomAdjBonds(atom)) bondSum += mol.bondOrder(b);
let nativeVal = chgmod + mol.atomUnpaired(atom) + hyd + bondSum;
// if there are valence options and this is the first one, it should work out
if (options && options[0] == nativeVal) return 0;
// NOTE: in cases with multiple valence options, like S[2,4,6], it would be possible to leave the valence unmarked
// when the previous state is indicated, e.g. for S{val=3} ==> +1 H to get to val=4; or we could just mark the
// valence, since the first one is always overwhelmingly the default
let val = nativeVal - chgmod;
return val <= 0 || val > 14 ? zeroVal : val;
}
}
export class MDLSDFWriter
{
// options
// content in progress
private lines:string[] = [];
// ----------------- public methods -----------------
constructor(public ds:DataSheet)
{
}
// write out the MDL content
public write():string
{
let ds = this.ds, lines = this.lines;
let colMol = this.ds.firstColOfType(DataSheet.COLTYPE_MOLECULE);
for (let i = 0; i < ds.numRows; i++)
{
let mol = colMol < 0 ? null : ds.getMolecule(i, colMol);
if (mol != null /*MolUtil.notBlank(mol)*/)
{
let molstr = new MDLMOLWriter(mol).write();
lines.push(molstr);
}
for (let j = 0; j < ds.numCols; j++) if (j != colMol && ds.notNull(i, j))
{
let ct = ds.colType(j);
let val = '';
if (ct == DataSheet.COLTYPE_STRING) val = ds.getString(i, j);
else if (ct == DataSheet.COLTYPE_INTEGER) val = ds.getInteger(i, j).toString();
else if (ct == DataSheet.COLTYPE_REAL) val = ds.getReal(i, j).toString();
else if (ct == DataSheet.COLTYPE_BOOLEAN) val = ds.getBoolean(i, j) ? 'true' : 'false';
if (val != '')
{
lines.push('> <' + ds.colName(j) + '>');
lines.push(val);
lines.push('');
}
}
lines.push('$$$$');
}
// !!
return lines.join('\n');
}
public getResult():string
{
return this.lines.join('\n');
}
// ----------------- private methods -----------------
}
/* EOF */ }