Skip to content

Commit

Permalink
Compress the PDB hetatm type_map by not storing the common C.sp2 and …
Browse files Browse the repository at this point in the history
…H types.
  • Loading branch information
johnmay authored and egonw committed Jan 3, 2022
1 parent 9f8de9b commit bf95024
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 359,942 deletions.
23 changes: 18 additions & 5 deletions storage/pdb/src/main/java/org/openscience/cdk/io/PDBReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.vecmath.Point3d;

Expand Down Expand Up @@ -108,6 +110,7 @@ public class PDBReader extends DefaultChemObjectReader {
* names; for example "RFB.N13" maps to "N.planar3".
*/
private Map<String, String> hetDictionary;
private Set<String> hetResidues;

private AtomTypeFactory cdkAtomTypeFactory;

Expand Down Expand Up @@ -630,7 +633,7 @@ private PDBAtom readAtom(String cLine, int lineLength) throws CDKException {
throw new RuntimeException("PDBReader error during readAtom(): line too short");
}

boolean isHetatm = cLine.substring(0, 6).equals("HETATM");
boolean isHetatm = cLine.startsWith("HETATM");
String atomName = cLine.substring(12, 16).trim();
String resName = cLine.substring(17, 20).trim();
String symbol = parseAtomSymbol(cLine);
Expand Down Expand Up @@ -723,18 +726,27 @@ private String typeHetatm(String resName, String atomName) {
cdkAtomTypeFactory = AtomTypeFactory.getInstance("org/openscience/cdk/dict/data/cdk-atom-types.owl",
DefaultChemObjectBuilder.getInstance());
}

// lookup the atom type using the residue and name, if the atom is a hydrogen
// or carbon and is a known residue we default to the common H and C.sp2 cases
String key = resName + "." + atomName;
if (hetDictionary.containsKey(key)) {
return hetDictionary.get(key);
}
String type = hetDictionary.get(key);
if (type != null)
return type;
else if (atomName.startsWith("H"))
return hetResidues.contains(resName) ? "H" : null;
else if (hetResidues.contains(resName) && atomName.startsWith("C"))
return hetResidues.contains(resName) ? "C.sp2" : null;

return null;
}

private void readHetDictionary() {
try {
InputStream ins = getClass().getResourceAsStream(hetDictionaryPath);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(ins));
hetDictionary = new HashMap<String, String>();
hetDictionary = new HashMap<>();
hetResidues = new HashSet<>();
String line;
while ((line = bufferedReader.readLine()) != null) {
int colonIndex = line.indexOf(':');
Expand All @@ -746,6 +758,7 @@ private void readHetDictionary() {
} else {
hetDictionary.put(typeKey, typeValue);
}
hetResidues.add(typeKey.split("\\.")[0]);
}
bufferedReader.close();
} catch (IOException ioe) {
Expand Down

0 comments on commit bf95024

Please sign in to comment.