-
Notifications
You must be signed in to change notification settings - Fork 87
/
file.py
640 lines (575 loc) · 25.8 KB
/
file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.
__name__ = "biotite.structure.io.pdbqt"
__author__ = "Patrick Kunzmann, Daniel Bauer"
__all__ = ["PDBQTFile"]
import warnings
import numpy as np
from ....file import TextFile, InvalidFileError
from ...error import BadStructureError
from ...atoms import AtomArray, AtomArrayStack
from ...charges import partial_charges
from ...bonds import BondList, BondType, find_connected, find_rotatable_bonds
PARAMETRIZED_ELEMENTS = [
"H", "C", "N", "O", "P", "S",
"F", "Cl", "Br", "I",
"Mg", "Ca", "Mn", "Fe", "Zn"
]
class PDBQTFile(TextFile):
"""
This class represents an *AutoDock* PDBQT file.
This class only provides rudimentary support for reading/writing
the pure atom information.
EXPERIMENTAL: Future API changes are probable.
Examples
--------
Write biotin as flexible ligand into a PDBQT file:
>>> import os.path
>>> ligand = residue("BTN")
>>> file = PDBQTFile()
>>> mask = file.set_structure(ligand, rotatable_bonds="all")
>>> # Print removed nonpolar hydrogen atoms
>>> print(ligand[~mask])
HET 0 BTN H101 H 3.740 1.170 0.970
HET 0 BTN H102 H 4.070 1.340 -0.770
HET 0 BTN H91 H 2.800 -0.740 -1.210
HET 0 BTN H92 H 2.480 -0.910 0.530
HET 0 BTN H81 H 1.290 1.260 0.520
HET 0 BTN H82 H 1.620 1.440 -1.220
HET 0 BTN H71 H 0.350 -0.650 -1.660
HET 0 BTN H72 H 0.020 -0.820 0.080
HET 0 BTN H2 H -0.840 1.580 -1.630
HET 0 BTN H61 H -3.800 1.840 1.290
HET 0 BTN H62 H -3.370 2.740 -0.200
HET 0 BTN H5 H -4.310 0.810 -1.210
HET 0 BTN H4 H -2.450 -0.040 -2.250
>>> print(file)
ROOT
HETATM 1 C11 BTN 0 5.090 -0.280 0.170 1.00 0.00 0.258 C
HETATM 2 O11 BTN 0 4.960 -1.470 0.030 1.00 0.00 -0.264 OA
ENDROOT
BRANCH 1 3
HETATM 3 O12 BTN 0 6.300 0.230 0.440 1.00 0.00 -0.331 OA
HETATM 17 HO2 BTN 0 7.030 -0.390 0.520 1.00 0.00 0.221 HD
ENDBRANCH 1 3
BRANCH 1 4
HETATM 4 C10 BTN 0 3.900 0.630 0.040 1.00 0.00 0.105 C
BRANCH 4 5
HETATM 5 C9 BTN 0 2.650 -0.200 -0.280 1.00 0.00 0.010 C
BRANCH 5 6
HETATM 6 C8 BTN 0 1.440 0.720 -0.410 1.00 0.00 0.002 C
BRANCH 6 7
HETATM 7 C7 BTN 0 0.200 -0.110 -0.730 1.00 0.00 0.016 C
BRANCH 7 8
HETATM 8 C2 BTN 0 -1.020 0.820 -0.860 1.00 0.00 0.065 C
HETATM 9 S1 BTN 0 -1.420 1.600 0.750 1.00 0.00 -0.154 SA
HETATM 10 C6 BTN 0 -3.200 1.830 0.370 1.00 0.00 0.090 C
HETATM 11 C5 BTN 0 -3.530 0.580 -0.480 1.00 0.00 0.091 C
HETATM 12 N1 BTN 0 -3.970 -0.510 0.410 1.00 0.00 -0.239 NA
HETATM 13 C3 BTN 0 -3.140 -1.550 0.270 1.00 0.00 0.272 C
HETATM 14 O3 BTN 0 -3.270 -2.590 0.890 1.00 0.00 -0.259 OA
HETATM 15 N2 BTN 0 -2.150 -1.340 -0.610 1.00 0.00 -0.239 NA
HETATM 16 C4 BTN 0 -2.290 0.010 -1.170 1.00 0.00 0.093 C
HETATM 18 HN1 BTN 0 -4.740 -0.470 1.000 1.00 0.00 0.132 HD
HETATM 19 HN2 BTN 0 -1.460 -1.980 -0.840 1.00 0.00 0.132 HD
ENDBRANCH 7 8
ENDBRANCH 6 7
ENDBRANCH 5 6
ENDBRANCH 4 5
ENDBRANCH 1 4
TORSDOF 6
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.pdb"))
"""
def get_remarks(self, model=None):
"""
Get the content of ``REMARKS`` lines.
Parameters
----------
model : int, optional
If this parameter is given, the function will return a
string from the remarks corresponding to the given
model number (starting at 1).
Negative values are used to index models starting from the
last model insted of the first model.
If this parameter is omitted, a list of strings
containing all models will be returned, even if the
structure contains only one model.
Returns
-------
lines : str or list of str
The content of ``REMARKS`` lines, without the leading
``'REMARKS'``.
"""
# Line indices where a new model starts
model_start_i = np.array([i for i in range(len(self.lines))
if self.lines[i].startswith(("MODEL"))],
dtype=int)
# Line indices with ATOM or HETATM records
remark_line_i = np.array([i for i in range(len(self.lines)) if
self.lines[i].startswith("REMARK")],
dtype=int)
# Structures containing only one model may omit MODEL record
# In these cases model starting index is set to 0
if len(model_start_i) == 0:
model_start_i = np.array([0])
if model is None:
# Add exclusive end of file
model_start_i = np.concatenate((model_start_i, [len(self.lines)]))
model_i = 0
remarks = []
for i in range(len(model_start_i) - 1):
start = model_start_i[i]
stop = model_start_i[i+1]
model_remark_line_i = remark_line_i[
(remark_line_i >= start) & (remark_line_i < stop)
]
remarks.append(
"\n".join([self.lines[i][7:] for i in model_remark_line_i])
)
return remarks
else:
last_model = len(model_start_i)
if model == 0:
raise ValueError("The model index must not be 0")
# Negative models mean index starting from last model
model = last_model + model + 1 if model < 0 else model
if model < last_model:
line_filter = ( ( remark_line_i >= model_start_i[model-1] ) &
( remark_line_i < model_start_i[model ] ) )
elif model == last_model:
line_filter = (remark_line_i >= model_start_i[model-1])
else:
raise ValueError(
f"The file has {last_model} models, "
f"the given model {model} does not exist"
)
remark_line_i = remark_line_i[line_filter]
# Do not include 'REMARK ' itself -> begin from pos 8
return "\n".join([self.lines[i][7:] for i in remark_line_i])
def get_structure(self, model=None):
"""
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the
PDBQT file.
Parameters
----------
model : int, optional
If this parameter is given, the function will return an
:class:`AtomArray` from the atoms corresponding to the given
model number (starting at 1).
Negative values are used to index models starting from the
last model insted of the first model.
If this parameter is omitted, an :class:`AtomArrayStack`
containing all models will be returned, even if the
structure contains only one model.
Returns
-------
array : AtomArray or AtomArrayStack
The return type depends on the `model` parameter.
"""
# Line indices where a new model starts
model_start_i = np.array([i for i in range(len(self.lines))
if self.lines[i].startswith(("MODEL"))],
dtype=int)
# Line indices with ATOM or HETATM records
atom_line_i = np.array([i for i in range(len(self.lines)) if
self.lines[i].startswith(("ATOM", "HETATM"))],
dtype=int)
# Structures containing only one model may omit MODEL record
# In these cases model starting index is set to 0
if len(model_start_i) == 0:
model_start_i = np.array([0])
if model is None:
depth = len(model_start_i)
length = self._get_model_length(model_start_i, atom_line_i)
array = AtomArrayStack(depth, length)
# Line indices for annotation determination
# Annotation is determined from model 1,
# therefore from ATOM records before second MODEL record
if len(model_start_i) == 1:
annot_i = atom_line_i
else:
annot_i = atom_line_i[atom_line_i < model_start_i[1]]
# Line indices for coordinate determination
coord_i = atom_line_i
else:
last_model = len(model_start_i)
if model == 0:
raise ValueError("The model index must not be 0")
# Negative models mean index starting from last model
model = last_model + model + 1 if model < 0 else model
if model < last_model:
line_filter = ( ( atom_line_i >= model_start_i[model-1] ) &
( atom_line_i < model_start_i[model ] ) )
elif model == last_model:
line_filter = (atom_line_i >= model_start_i[model-1])
else:
raise ValueError(
f"The file has {last_model} models, "
f"the given model {model} does not exist"
)
annot_i = coord_i = atom_line_i[line_filter]
array = AtomArray(len(coord_i))
# Save atom IDs for later sorting into the original atom order
atom_id = np.zeros(array.array_length(), int)
# Create annotation arrays
chain_id = np.zeros(array.array_length(), array.chain_id.dtype)
res_id = np.zeros(array.array_length(), array.res_id.dtype)
ins_code = np.zeros(array.array_length(), array.ins_code.dtype)
res_name = np.zeros(array.array_length(), array.res_name.dtype)
hetero = np.zeros(array.array_length(), array.hetero.dtype)
atom_name = np.zeros(array.array_length(), array.atom_name.dtype)
element = np.zeros(array.array_length(), array.element.dtype)
# Fill annotation array
# i is index in array, line_i is line index
for i, line_i in enumerate(annot_i):
line = self.lines[line_i]
atom_id[i] = int(line[6:11])
chain_id[i] = line[21].strip()
res_id[i] = int(line[22:26])
ins_code[i] = line[26].strip()
res_name[i] = line[17:20].strip()
hetero[i] = (False if line[0:4] == "ATOM" else True)
atom_name[i] = line[12:16].strip()
element[i] = line[76:78].strip()
# Add annotation arrays to atom array (stack)
array.chain_id = chain_id
array.res_id = res_id
array.ins_code = ins_code
array.res_name = res_name
array.hetero = hetero
array.atom_name = atom_name
array.element = element
# Fill in coordinates
if isinstance(array, AtomArray):
for i, line_i in enumerate(coord_i):
line = self.lines[line_i]
array.coord[i,0] = float(line[30:38])
array.coord[i,1] = float(line[38:46])
array.coord[i,2] = float(line[46:54])
elif isinstance(array, AtomArrayStack):
m = 0
i = 0
for line_i in atom_line_i:
if m < len(model_start_i)-1 and line_i > model_start_i[m+1]:
m += 1
i = 0
line = self.lines[line_i]
array.coord[m,i,0] = float(line[30:38])
array.coord[m,i,1] = float(line[38:46])
array.coord[m,i,2] = float(line[46:54])
i += 1
# Sort into the original atom order
array = array[..., np.argsort(atom_id)]
return array
def set_structure(self, atoms, charges=None, atom_types=None,
rotatable_bonds=None, root=None, include_torsdof=True):
"""
Write an :class:`AtomArray` into the PDBQT file.
Parameters
----------
atoms : AtomArray, shape=(n,)
The atoms to be written into this file.
Must have an associated :class:`BondList`.
charges : ndarray, shape=(n,), dtype=float, optional
Partial charges for each atom in `atoms`.
By default, the charges are calculated using the PEOE method
(:func:`partial_charges()`).
atom_types : ndarray, shape=(n,), dtype="U1", optional
Custom *AutoDock* atom types for each atom in `atoms`.
rotatable_bonds : None or 'rigid' or 'all' or BondList, optional
This parameter describes, how rotatable bonds are handled,
with respect to ``ROOT``, ``BRANCH`` and ``ENDBRANCH``
lines.
- ``None`` - The molecule is handled as rigid receptor:
No ``ROOT``, ``BRANCH`` and ``ENDBRANCH`` lines will
be written.
- ``'rigid'`` - The molecule is handled as rigid ligand:
Only a ``ROOT`` line will be written.
- ``'all'`` - The molecule is handled as flexible
ligand:
A ``ROOT`` line will be written and all rotatable
bonds are included using ``BRANCH`` and ``ENDBRANCH``
lines.
- :class:`BondList` - The molecule is handled as
flexible ligand:
A ``ROOT`` line will be written and all bonds in the
given :class:`BondList` are considered flexible via
``BRANCH`` and ``ENDBRANCH`` lines.
root : int, optional
Specifies the index of the atom following the ``ROOT`` line.
Setting the root atom is useful for specifying the *anchor*
in flexible side chains.
This parameter has no effect, if `rotatable_bonds` is
``None``.
By default, the first atom is also the root atom.
include_torsdof : bool, optional
By default, a ``TORSDOF`` (torsional degrees of freedom)
record is written at the end of the file.
By setting this parameter to false, the record is omitted.
Returns
-------
mask : ndarray, shape=(n,), dtype=bool
A boolean mask, that is ``False`` for each atom of the input
``atoms``, that was removed due to being a nonpolar
hydrogen.
"""
# Check if AtomArray is suitable for PDBQT
max_atoms, max_residues = 99999, 9999
if atoms.array_length() > max_atoms:
warnings.warn(f"More then {max_atoms:,} atoms per model")
if (atoms.res_id > max_residues).any():
warnings.warn(f"Residue IDs exceed {max_residues:,}")
if np.isnan(atoms.coord).any():
raise BadStructureError("Coordinates contain 'NaN' values")
if any([len(name) > 1 for name in atoms.chain_id]):
raise BadStructureError("Some chain IDs exceed 1 character")
if any([len(name) > 3 for name in atoms.res_name]):
raise BadStructureError("Some residue names exceed 3 characters")
if any([len(name) > 4 for name in atoms.atom_name]):
raise BadStructureError("Some atom names exceed 4 characters")
if charges is None:
charges = partial_charges(atoms)
charges[np.isnan(charges)] = 0
else:
if np.isnan(charges).any():
raise ValueError("Input charges contain NaN values")
# Get AutoDock atom types and remove nonpolar hydrogen atoms
atoms, charges, types, mask = convert_atoms(atoms, charges)
# Overwrite calculated atom types with input atom types
if atom_types is not None:
types = atom_types[mask]
if rotatable_bonds is None:
# No rotatable bonds -> the BondList contains no bonds
rotatable_bonds = BondList(atoms.bonds.get_atom_count())
use_root = False
elif rotatable_bonds == "rigid":
rotatable_bonds = BondList(atoms.bonds.get_atom_count())
use_root = True
elif rotatable_bonds == "all":
rotatable_bonds = find_rotatable_bonds(atoms.bonds)
use_root = True
else:
if rotatable_bonds.ndim != 2 or rotatable_bonds.shape[1] != 2:
raise ValueError(
"An (nx2) array is expected for rotatable bonds"
)
rotatable_bonds = BondList(
len(mask), np.asarray(rotatable_bonds)
)[mask]
use_root = True
if root is None:
root_index = 0
else:
# Find new index of root atom, since the index might have
# been shifted due to removed atoms
original_indices = np.arange(len(mask))
new_indices = original_indices[mask]
try:
root_index = np.where(new_indices == root)[0][0]
except IndexError:
raise ValueError(
"The given root atom index points to an nonpolar hydrogen "
"atom, that has been removed"
)
# Add bonds of the rigid root to rotatable bonds,
# as they probably have been filtered out,
# as the root is probably a terminal atom
for atom, bond_type in zip(*atoms.bonds.get_bonds(root_index)):
rotatable_bonds.add_bond(root_index, atom, bond_type)
# Break rotatable bonds
# for simple branch determination in '_write_atoms()'
atoms.bonds.remove_bonds(rotatable_bonds)
hetero = ["ATOM" if e == False else "HETATM" for e in atoms.hetero]
if "atom_id" in atoms.get_annotation_categories():
atom_id = atoms.atom_id
else:
atom_id = np.arange(1, atoms.array_length()+1)
occupancy = np.ones(atoms.array_length())
b_factor = np.zeros(atoms.array_length())
# Convert rotatable bonds into array for easier handling
# The bond type is irrelevant from this point on
rotatable_bonds = rotatable_bonds.as_array()[:,:2]
self.lines = []
self._write_atoms(
atoms, charges, types,
atom_id, hetero, occupancy, b_factor,
root_index, rotatable_bonds,
np.zeros(len(rotatable_bonds), dtype=bool), use_root
)
if include_torsdof:
self.lines.append(f"TORSDOF {len(rotatable_bonds)}")
return mask
def _write_atoms(self, atoms, charges, types,
atom_id, hetero, occupancy, b_factor,
root_atom, rotatable_bonds, visited_rotatable_bonds,
is_root):
if len(rotatable_bonds) != 0:
# Get the indices to atoms of this branch, i.e. a group of
# atoms that are connected by non-rotatable bonds
# Use 'find_connected()', since rotatable bonds were removed
# from the BondList before
this_branch_indices = find_connected(atoms.bonds, root_atom)
# The root atom of the branch, i.e. the atom connected by
# the rotatable bond should always be listed first
# -> Remove root atom and insert it at the beginning
this_branch_indices = np.insert(
this_branch_indices[this_branch_indices != root_atom],
0,
root_atom
)
else:
# No rotatable bonds
# -> all atom are in root i.e. this branch
this_branch_indices = np.arange(atoms.array_length())
if is_root:
self.lines.append("ROOT")
for i in this_branch_indices:
self.lines.append(
f"{hetero[i]:6}"
f"{atom_id[i]:>5d} "
f"{atoms.atom_name[i]:4} "
f"{atoms.res_name[i]:3} "
f"{atoms.chain_id[i]:1}"
f"{atoms.res_id[i]:>4d}"
f"{atoms.ins_code[i]:1} "
f"{atoms.coord[i,0]:>8.3f}"
f"{atoms.coord[i,1]:>8.3f}"
f"{atoms.coord[i,2]:>8.3f}"
f"{occupancy[i]:>6.2f}"
f"{b_factor[i]:>6.2f} "
f"{charges[i]:>6.3f} "
f"{types[i]:2}"
)
if is_root:
self.lines.append("ENDROOT")
if len(rotatable_bonds) == 0:
# No rotatable bonds -> no branching
return
for k, (i, j) in enumerate(rotatable_bonds):
if visited_rotatable_bonds[k]:
continue
# Create a new branch for each rotatable bond,
# that connects to an atom of this branch
if i in this_branch_indices:
this_br_i = i
new_br_i = j
elif j in this_branch_indices:
this_br_i = j
new_br_i = i
else:
# Rotatable bond does not start from this branch
continue
# Mark rotatable bond as visited as otherwise branches would
# be created back and forth over the same rotatable bond and
# this method would never terminate
visited_rotatable_bonds[k] = True
self.lines.append(
f"BRANCH {atom_id[this_br_i]:>3d} {atom_id[new_br_i]:>3d}"
)
self._write_atoms(
atoms, charges, types,
atom_id, hetero, occupancy, b_factor,
# The root atom of the branch
#is the other atom of the rotatable bond
new_br_i, rotatable_bonds, visited_rotatable_bonds,
False
)
self.lines.append(
f"ENDBRANCH {atom_id[this_br_i]:>3d} {atom_id[new_br_i]:>3d}"
)
def _get_model_length(self, model_start_i, atom_line_i):
"""
Determine length of models and check that all models
have equal length.
"""
n_models = len(model_start_i)
length = None
for model_i in range(len(model_start_i)):
model_start = model_start_i[model_i]
model_stop = model_start_i[model_i+1] if model_i+1 < n_models \
else len(self.lines)
model_length = np.count_nonzero(
(atom_line_i >= model_start) & (atom_line_i < model_stop)
)
if length is None:
length = model_length
if model_length != length:
raise InvalidFileError(
f"Model {model_i+1} has {model_length} atoms, "
f"but model 1 has {length} atoms, must be equal"
)
return length
def convert_atoms(atoms, charges):
"""
Convert atoms into *AutoDock* compatible atoms.
Parameters
----------
atoms : AtomArray
The atoms to be converted.
charges : ndarray, dtype=float
Partial charges for the atoms.
Returns
-------
converted_atoms : AtomArray
The input `atoms`, but with deleted nonpolar hydrogen atoms.
charges : ndarray, dtype=float
The input `charges`, but with deleted entries for nonpolar
hydrogen atoms.
atom_types : ndarray, dtype="U1"
The *AutoDock* atom types.
mask : ndarray, shape=(n,), dtype=bool
A boolean mask, that is ``False`` for each atom of the input
``atoms``, that was removed due to being a nonpolar hydrogen.
"""
charges = charges.copy()
all_bonds, all_bond_types = atoms.bonds.get_all_bonds()
atom_types = np.zeros(atoms.array_length(), dtype="U2")
hydrogen_removal_mask = np.zeros(atoms.array_length(), dtype=bool)
for i in range(atoms.array_length()):
element = atoms.element[i]
bonded_atoms = all_bonds[i][all_bonds[i] != -1]
if element == "H":
if len(bonded_atoms) == 0:
# Free proton
atom_types[i] = "H"
elif len(bonded_atoms) == 1:
j = bonded_atoms[0]
bonded_element = atoms.element[j]
if bonded_element == "C":
# Remove hydrogen and add its charge
# to charge of bonded carbon
charges[j] += charges[i]
hydrogen_removal_mask[i] = True
else:
atom_types[i] = "HD"
else:
raise BadStructureError(
"Structure contains hydrogen with multiple bonds"
)
elif element == "C":
if np.isin(
all_bond_types[i],
[BondType.AROMATIC_SINGLE, BondType.AROMATIC_DOUBLE]
).any():
# Aromatic carbon
atom_types[i] = "A"
else:
# Alphatic carbon
atom_types[i] = "C"
elif element == "N":
atom_types[i] = "NA"
elif element == "O":
atom_types[i] = "OA"
elif element == "S":
atom_types[i] = "SA"
elif element.capitalize() in PARAMETRIZED_ELEMENTS:
atom_types[i] = element.capitalize()
else:
warnings.warn(
f"Element {element} is not paramtrized, "
f"using parameters for hydrogen instead"
)
atom_types[i] = "H"
mask = ~hydrogen_removal_mask
return atoms[mask], charges[mask], atom_types[mask], mask