/
pdb2ins.py
3209 lines (2995 loc) · 150 KB
/
pdb2ins.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
__author__ = 'anna'
"""
first project pdb2ins
by Anna Vera Luebben
start February 2015
version 2017/2 (August)
Read pdb file and generate .ins file for SHELXL.
The pdb file is assumed to conform to the Protein Data Bank notes
'Atomic Coordinate and Bibliographic Entry Format Description Version 3.30'.
"""
import os
import sys
import time
from sys import exit
import numpy as np
import pdb2hkl
import transformations
from LigandsInstructions import ligandRestraints
from ResiInstructions import instructions
from cmd import CommandlineParser
from head import head
from spagsydata import getSymmCards, testSpaceGroup
options = None
# Padding fills up the residue number to four digits (from the left) with zeros.
# padding = '0>4.0'
padding = '<4.0'
buildin_raw_Input = raw_input
def dummy(_):
"""
Dummy
:param _: Dummy
:return: None
"""
pass
class OutputProxy(object):
"""
Object replacing sys.stdout to reroute output from stdout to a callback.
"""
def __init__(self, cb):
self.cb = cb
def write(self, *args):
"""
Interface to the 'print' statement.
:param args: list of strings
:return: None
"""
self.cb(args)
def setSlaveMode(opt, cb=None):
"""
Sets pdb2ins script to SlaveMode thereby using an dictionary object created by cmd.CommanineParser.__call__()
instead of using sys.argv to build the dictionary itself and intercepting all output to sys.stdout and rerouting
it to the callback object 'cb'
:param opt: Dictionary type defining all options as output by cmd.CommandlineParser.__call__().
:param cb: Callable that gets called with the string arguments that are usually written to sys.stdout.
:return: None
"""
if not cb:
cb = dummy
global options
options = opt
sys.stdout = OutputProxy(cb)
def raw_input(*args, **kwargs):
"""
Enables the user to terminate the program by typing either 'q' or 'exit' during raw_input.
:param args:
:param kwargs:
:return:
"""
inputString = buildin_raw_Input(*args, **kwargs)
if inputString.lower() == 'q' or inputString.lower() == 'exit':
print '*** PDB2INS has been terminated ***'
exit()
return inputString
class Data(object):
def __init__(self):
self.strings = None
self.IO = IO(options)
self.hklf = None
self.hklfile = None
self.atomContainer = AtomContainer()
self.header = Header()
self.askHKL()
self.makeHKLfile()
self.IO.read()
self.hasHAtoms = False
self.neutronData = False
self.readContent()
self.printWarnings()
self.header.extractWavelength()
self.atomContainer.extractAllElements()
self.dealWithHAtoms()
self.atomContainer.getResidueList()
# self.header.extractCell()
self.header.abbreviateSpaceGroup()
self.header.extractResiSequence()
# self.header.extractScale()
self.header.makeGeneralRefinementInstructions()
if 'HOH' in self.atomContainer.getOtherResiSet():
self.askWaterOccupancy()
self.joinstrings()
self.IO.writeFile(self)
def askHKL(self):
"""
The first question the user is asked after starting pdb2ins. (pdb2ins will need the format of the hkl file
therefore it is necessary to run pdb2hkl subroutine first.)
'yes' will lead down the pdb2hkl path. Default answer is 'no'
:return:
"""
if not options['i'] and not options['b']:
while True:
doHKL = raw_input('\nCreate .hkl file from structure factor file (cif) or PDB code? (y or n) '
'[N]: ')
if not doHKL:
break
if doHKL == 'Y' or doHKL == 'y':
options['b'] = True
break
if doHKL == 'N' or doHKL == 'n':
break
if options['b'] and not options['filename']: # check if indentation correct!
self.askHKLfilename()
def askHKLfilename(self):
"""
Choose a file or enter a pdbcode prefixed with '@'. The prefix will signal the program to download the -sf.cif
file from the RCSB PDB.
When the file should be downloaded, the function checks if the file is already available locally.
:return:
"""
while True:
self.hklfile = raw_input("\nEnter name of structure factor file to read (To download a PDB "
"file enter \'@<PDBCODE>\'): ") # .upper()
if not os.path.isfile(self.hklfile) and not self.hklfile.startswith('@'):
newstring = str(self.hklfile[:-4].upper())+str(self.hklfile[-4:])
if not os.path.isfile(newstring) and not os.path.isfile(self.hklfile.lower()):
# print self.hklfile.upper(), newstring, self.hklfile.lower()
print 'INFO: File \'{}\' not found.'.format(self.hklfile)
if os.path.isfile(newstring):
self.hklfile = newstring
break
if os.path.isfile(self.hklfile.lower()):
self.hklfile = self.hklfile.lower()
break
if not self.hklfile.endswith('.pdb'):
self.hklfile += '.pdb'
if os.path.isfile(self.hklfile):
print 'INFO: Using file \'{}\' instead.'.format(self.hklfile)
break
if self.hklfile.startswith('@'):
if len(self.hklfile) == 5:
options['filename'] = self.hklfile
break
else:
print 'ERROR: Given pdb code is not correct! Please check.'
pass
else:
break
options['d'] = self.hklfile
def makeHKLfile(self):
"""
options 'b':tells us a hkl file should be created.
options 'd': a input file in .cif format containing structure factors is given to create an .hkl
options 'o': an output filename for the .ins file is specified and will be used for the .hkl file also.
options 'filename': if starting with an '@', this code is used to download the *-sf.cif file from the RCSB PDB.
This function tries to generate a input and output filename for the structure factor files.
Those and other information is given as optionsForPdb2hkl to the subroutine pdb2hkl.
The subroutine pdb2hkl is started.
:return:
"""
options2 = []
if options['b']:
if options['d']:
filename = options['d']
else:
filename = options['filename']
outfile = None
if options['o']:
insOutfilename = str(options['o'])
outfile = ''.join(str(insOutfilename).split('.')[:-1]) + '.hkl'
# print '2', outfile
elif str(filename).startswith('@'):
insfilename = options['filename']
# print '1', insfilename
# if insfilename:
if not str(insfilename).startswith('@'):
outfile = ''.join(str(insfilename).split('.')[:-1]) + '.hkl'
else:
outfile = str(insfilename[1:]) + '.hkl'
# print '3', outfile
# else:
# outfile = str(insfilename[1:]) + '.hkl'
# print '4', outfile
if not outfile: # if not str(filename).startswith('@'):
outfile = ''.join(str(filename).split('.')[:-1]) + '.hkl' # "".join(f.split('.')[:-1]) + '.ins'
# print '5', outfile
if options['i']:
i = True
else:
i = False
# print outfile
optionsForPdb2hkl = {'filename': filename, 'i': i, 'o': outfile}
# print optionsForPdb2hkl
try:
print 'INFO: Starting pdb2hkl.'
try:
self.header.hklf, options2 = pdb2hkl.run(optionsForPdb2hkl)
except SystemExit:
pass
hklFileName = options2['filename']
filenamePDB = options['filename']
if hklFileName != filenamePDB:
options['filename'] = hklFileName
# self.hklf = pdb2hkl.Data.getHKLF()
# print self.header.hklf
except SystemError:
pass
def getHKLF(self):
return self.header.hklf
def buildInstructions(self):
"""
The list of all residues present in the pdb file is used to generated the general refinement instructions.
First it is checked if HOH (water) is present and a CONN, ISOR line is added.
Next RTAB, HFIX and restraints are fetched for all natural amino acids.
The necessary instructions for aa are given in the ResiInstructions file.
For a selection of ligands the restraints can be added from LigandInstructions subroutine.
At last all residues without restraints (i.e. ligands, metals) are listed.
:return: string
"""
self.atomContainer.makeRestraintsForTermini(self.header.getResiDict())
if 'HOH' in self.atomContainer.getOtherResiSet():
waterInstructions = ['ISOR_HOH 0.1 $O !water atoms are restrained to near isotropic behavior',
'CONN_HOH 0 O !generation of connectivity table fine-tunning']
# waterInstructions = ['ISOR_HOH 0.1 $O', 'CONN_HOH 0 $O']
else:
waterInstructions = ["REM ISOR and CONN 0 recommended on adding water"]
blockNames = ['RTAB', 'HFIX', 'Restraints']
missingRestraints = set()
instructionStrings = [waterInstructions]
instructionStrings.append(['\nREM Restraints and HFIX for terminal residues as follows:\n'])
instructionStrings.append(self.atomContainer.getRestraintsForTermini())
for i, instruction in enumerate(instructions):
instructionStrings.append(['\nREM {} instruction block\n'.format(blockNames[i])])
for resiName in ['All']+self.atomContainer.getResidueList() + self.atomContainer.getOtherResiSet():
try:
resiInstruction = instruction[resiName]
instructionStrings.append(resiInstruction)
except KeyError:
if i == 2 and resiName not in ['HOH', 'All']:
missingRestraints.add(resiName)
pass
foundRestraints = set()
for i in missingRestraints:
try:
ligandRestraintsFound = ligandRestraints[i]
instructionStrings.append(['\n'])
instructionStrings.append(['REM Restraints for ligand {}:\n'.format(i)])
instructionStrings.append(ligandRestraintsFound)
foundRestraints.add(i)
instructionStrings.append(['\n'])
except KeyError:
continue
missingRestraints -= foundRestraints
if missingRestraints:
if len(missingRestraints) <= 4:
instructionStrings.append(['\nREM Restraints missing for the following residues: ' + ", ".join(
[str(i) for i in list(missingRestraints)])])
else:
instructionStrings.append(['\nREM Restraints missing for the following residues: \n',
'REM ' + ', ' .join([str(i) for i in list(missingRestraints)])])
instructionStrings.append(['\n'])
text = '\nINFO: Following ligands/residues ' \
'have no restraints:\n ' + ', ' .join([str(i) for i in list(missingRestraints)])
print text + '\n Please remember to manually add restraints for this residues.\n'
import itertools
return '\n'.join(list(itertools.chain.from_iterable(instructionStrings)))
def readContent(self):
"""
reads the first 6 letters in the pdb file and sorts the line into the appropriate class for further use.
The line ANISOU is only given to atom container if useAnisou is TRUE.
:return: line
"""
useAnisou = True
alreadyasked = False
try:
for line in self.IO.dataf:
if line[0] == "#":
continue
if line[:6] == 'EXPDTA':
self.isCrystData(line)
if line[:6] == "ATOM " or line[:6] == "HETATM":
self.atomContainer.extractAtom(line)
if line[:6] == "HET ":
self.atomContainer.extractHetRecord(line)
if line[:6] == "ANISOU" and useAnisou:
if not alreadyasked:
useAnisou = self.askAnisou()
alreadyasked = True
if useAnisou:
self.atomContainer.extractAtomAnisou(line)
else:
self.header.interpretLine(line)
except TypeError:
# print line
print '\nERROR: File is not a PDB file.\n *** PDB2INS is terminated without writing an .ins file. ***'
exit()
def printWarnings(self):
"""
All Warnings that can occur multiple times during reading the ATOM lines of the pdb file should only be printed
once. Therefore this function is called after reading all lines and prints the messages.
:return:
"""
# if self.atomContainer.negResiNumber:
# print '\n*** WARNING: Negative residue numbers found in file. SHELXL might not be able to handle.***\n'
if self.atomContainer.overlongResiNum:
print '\n*** WARNING: One or more residues have a residue number larger than 10 000. Please check!***\n'
if self.atomContainer.wrongResiName:
print '\n*** WARNING: The files contain residue names starting with a number. \n' \
' Older SHELXL versions can only handle residues names starting with a letter.\n'
if self.atomContainer.waterOffsetWarning:
print '\nINFO: Water residue numbers were changed to handle residues with insertion codes.\n'
if self.atomContainer.resiNUmberCollision:
print '*** Warning: The residues with a number larger than 1000 could collide with applied offset \n' \
' for insertion code residues. Please check!\n'
def dealWithHAtoms(self):
"""
This functions is called to find out whether X-Ray diffraction data was given with H atoms (eg. from PHENIX pdb
files). When H atoms are found in the pdb file, the function askAboutHAtoms is called.
:return:
"""
# print self.atomContainer.elementDict.keys()
# for key in self.atomContainer.elementDict.keys():
# print key
if self.atomContainer.elementDict['H'] > 0 and not self.neutronData:
self.hasHAtoms = True
if self.hasHAtoms:
self.askAboutHAtoms()
def askAboutHAtoms(self):
"""
This function is called when H atoms where found in an pdb file containing x-ray diffraction data.
The user is informed that shelxl can use the HFIX command to generate H atoms and it is recommended to erase
them.
It should be noted that pdb files containing some of their natural aa residues in PART instruction will have
trouble with HFIX in shelxl.
:return:
"""
print ('\nINFO: This pdb file contains Hydrogen atoms from X-ray diffraction data. \n'
'It is recommended to delete all Hydrogen atoms now and use HFIX in shelxl \n'
'to place them again. This program automatically creates the necessary \n'
'HFIX instructions for natural amino acids.')
# 'It should be noted that is not recommended to use the HFIX instructions after disorder has been
# modeled')
if not options['e']:
if not options['i']:
reply = raw_input('Delete all Hydrogen atoms in .ins file? (y or n) [Y]: ')
if reply == 'N' or reply == 'n':
self.atomContainer.keepHAtoms = True
elif reply == 'Y' or reply == 'y' or not reply:
self.atomContainer.keepHAtoms = False
else:
self.atomContainer.keepHAtoms = False
else:
print 'INFO: Hydrogen atoms will not be transferred to .ins file.'
self.atomContainer.keepHAtoms = False
else: # This part should run if options 'e' is True.
self.atomContainer.keepHAtoms = True
def isCrystData(self, line):
"""
takes the pdb line starting with EXPDTA as an input (which specifies the experiment) and searches for
'X-RAY DIFFRACTION' in the string. if the string does not contain this phrase, the user is given the line and
asked whether the program should continue. Default answer is 'no', which will terminate the program.
:param line: string
:return:
"""
self.neutronData = False
if 'NEUTRON' in line:
self.neutronData = True
self.atomContainer.neut = True
print '\nINFO: This file contains NEUTRON diffraction data. ' \
'The .ins file will now contain the NEUT instruction.\n' \
'Please be aware that the restraints for ligands may not be suitable for neutron data.'
if 'X-RAY DIFFRACTION' not in line and 'NEUTRON' not in line:
print 'This pdb file contains the following experimental data:'
line1 = line[6:]
print line1
reply = raw_input('\nThis pdb file might not contain X-RAY diffraction data.\n'
'Important information necessary to create an .ins file might be missing.\n'
'Missing data can cause the program to terminate inadvertently.\n'
'Continue anyway? (y or n) [N]: ')
# print reply
if reply == 'N' or reply == 'n' or not reply:
print ' *** PDB2INS is terminated without writing an .ins file. ***'
exit()
def askAnisou(self):
"""
-only called when anisotropic data is present.
Checks options for an entry under 'i'. If the option is given, the boolean useAnisou is set to TRUE.
If no option was given (interactive modus), the user is prompted whether the anisotropic data should be
converted to isotropic data.
default answer is 'yes'. 'yes' sets the useAnisou to FALSE, 'no' to TRUE.
:return: useAnisou (boolean) or options['a']
"""
if options['a']:
return options['a']
else:
if not options['i']:
reply = raw_input('\nThe pdb file contains anisotropic atom data. \nConvert anisotropic atoms to '
'isotropic? (y or n) [Y]: ')
if reply == 'Y' or reply == 'y' or not reply:
useAnisou = False
elif reply == 'N' or reply == 'n':
useAnisou = True
else:
useAnisou = False
print " *** ERROR: Invalid response \'{}\'. " \
"Anisotropic atoms will be converted to isotropic as default. *** ".format(reply)
return useAnisou
else:
useAnisou = options['a']
if useAnisou:
answer = 'No'
else:
answer = 'Yes'
print 'INFO: The pdb file contains anisotropic data. Convert to isotropic? (y or n) [Y]: ' \
'\n PDB2INS used default answer "{}".'.format(answer)
return options['a']
def askWaterOccupancy(self):
"""
if the interactive mode is True, the user is asked whether the water occupancy should be reset to unity.
default is to reset occupancy by calling the resetOccupancy function in class Atom.
:return:
"""
if not options['i']:
reply = raw_input("\nReset water occupancy to unity? (y or n) [Y]: ")
if not reply or reply == 'y' or reply == 'Y':
resetOccupancy = True
elif reply == 'n' or reply == 'N':
resetOccupancy = False
else:
print 'Invalid response. Water occupancy reset to unity as default.'
resetOccupancy = True
else:
print 'INFO: PDB2INS reset water occupancy to unity as default.'
resetOccupancy = True
if resetOccupancy:
Atom.resetOccupancy('HOH', 1)
def joinstrings(self):
"""
creates strings for the lines needed in the header of .ins files.
appends all strings created in atom container.
:return: a joined string of all strings needed for .ins files
"""
self.strings = []
self.strings.append("TITL converted from file {}\n".format(self.IO.workfile))
self.strings.append("CELL {:7.5f} {} \n".format(self.header.getWavelength(),
'{:7.3f} {:7.3f} {:7.3f} {:7.2f} {:7.2f} {:7.2f}'
.format(*self.header.getCell())))
self.strings.append("ZERR {} {} \n\n".format(self.header.getZvalue(),
'{:7.3f} {:7.3f} {:7.3f} {:7.2f} {:7.2f} {:7.2f}'
.format(*self.header.getCellError())))
self.strings.append("REM Space group {} \n\n".format(self.header.getSpaceGroup()))
self.strings.append("LATT {} \n".format(self.header.getLattice()))
#self.header.validateSpaceGroup(self.header.getAbbrSpaceGroup())
self.strings += getSymmCards(self.header.getAbbrSpaceGroup())
# try:
# self.strings += getSymmCards(self.header.getAbbrSpaceGroup())
# except KeyError:
# print 'Space group {} is not valid.'.format(self.header.getAbbrSpaceGroup())
# exit()
if self.neutronData:
self.strings.append("NEUT \n")
self.strings.append("SFAC {} \n".format(('{} '*len(self.atomContainer.getElementList()))
.format(*self.atomContainer.getElementList())))
if self.header.makeDISP:
self.header.makeDISPinstruction(self.atomContainer.getElementList())
self.strings += self.header.getDISPinstructions()
self.strings.append("UNIT {} \n\n".format(('{} '*len(self.atomContainer.getElementList()))
.format(*[self.atomContainer.elementDict[key] * self.header.getZvalue()
for key in self.atomContainer.getElementList()])))
atomContainerString = self.atomContainer.asShelxString(self.header.getCell())
self.strings.append(self.header.getGeneralRefinementInstructions())
self.strings.append("\n")
self.strings.append(self.header.getRIGUInstructions(self.atomContainer.getElementList2()))
self.strings.append(self.header.getDELUInstruction(self.atomContainer.getElementList2()))
self.strings.append(self.header.getSIMUInstruction(self.atomContainer.getElementList2()))
self.strings.append(self.header.getGeneralRefinementInstructions3())
if self.atomContainer.incompleteResiString:
self.strings.append("\n")
self.strings.append("REM HFIX 0 instructions were added for incomplete residues.\n")
self.strings += self.atomContainer.incompleteResiString
self.strings.append("\n")
self.strings.append(self.buildInstructions())
self.strings.append("\n")
if self.atomContainer.insertionCodeString:
self.strings.append('REM Instructions for residues with insertion code.\n')
self.strings.append("\n")
for i in self.atomContainer.insertionCodeString:
self.strings.append(i + '\n')
self.strings.append("\n")
self.atomContainer.findSSBonds()
if self.atomContainer.ssBonds:
self.strings.append("REM Instructions for disulfide-bridges:\n")
self.strings += self.atomContainer.getSSBonds()
self.strings.append("\n\n")
self.strings.append(self.atomContainer.asShelxString(self.header.getCell()))
# self.strings.append(self.atomContainer.asShelxString(self.header.getCell()))
self.strings.append("\n\n")
self.strings.append(self.header.getHklf()+"\nEND")
class IO(object):
def __init__(self, options, *args, **kwargs):
self.workfile = None
self.dataf = None
self.outputFilename = None
self.usePDBredo = False
self.options = options
def askPDBredo(self):
"""
User is promted whether the RCSB or PDB-REDO server should be used to download the .pdb file. Options are
enumerated 1 and 2.
:return:
"""
# if not self.options['GUI']:
if not self.options['i'] and not self.options['r']:
while True:
pdbRedo = raw_input('\nDownload PDB file from RCSB Protein Data Base (1) or PDB_REDO database '
'(2)? [1]: ')
if not pdbRedo or pdbRedo == '1':
self.usePDBredo = False
break
elif pdbRedo == '2':
self.usePDBredo = True
break
else:
pass
if self.options['i'] and not self.options['r']:
self.usePDBredo = False
def askFilename(self):
"""
Asks the user for a name of the pdb file that needs to be converted in the form 'name.format'.
It is also possible to give the PDB code of the desired file in the form '@PDBCODE'.
In this case the function fetch_pdb in fromDali is called upon to retrieve the code from the pdb database.
Some pdb files to main:
3LOH (big file!)
1AZI (relatively small)
3YMI, 1CTJ (space group R 3)
2QXX (space group R 3 2)
:return: self.workfile
"""
self.workfile = self.options['filename']
while True:
if self.workfile: # when cmd parser is used, the filename should already be specified, skipping user input
try:
if not os.path.isfile(self.workfile) and '@' not in self.workfile:
newstring = str(self.workfile[:-4].upper())+str(self.workfile[-4:])
if not os.path.isfile(newstring) and not os.path.isfile(self.workfile.lower()):
print 'INFO: File {} not found.'.format(self.workfile)
if not self.workfile.endswith('.pdb'):
self.workfile += '.pdb'
if os.path.isfile(self.workfile):
print 'INFO: Using file \'{}\' instead.'.format(self.workfile)
else:
print ' *** Error: Given file name not valid. *** '
self.workfile = None
if os.path.isfile(newstring):
self.workfile = newstring
if os.path.isfile(self.workfile.lower()):
self.workfile = self.workfile.lower()
# if self.workfile.startswith('@'):
# trystring = str(self.workfile[-4:]) + '_a.pdb' # when the file was already loaded in the GUI
# if os.path.isfile(trystring):
# self.workfile = trystring
except TypeError:
print 'type error'
self.workfile = None
if not self.workfile: # in interactive mode without cmd options, the user is asked for the filename
if not self.options['d'] and not self.options['i']:
while True:
self.workfile = raw_input("\nEnter name of PDB file to read (To download a PDB "
"file enter \'@<PDBCODE>\'): ") # .upper()
if not os.path.isfile(self.workfile) and not self.workfile.startswith('@'):
newstring = str(self.workfile[:-4].upper())+str(self.workfile[-4:])
if not os.path.isfile(newstring) and not os.path.isfile(self.workfile.lower()):
# print self.workfile.upper(), newstring, self.workfile.lower()
print 'INFO: File \'{}\' not found.'.format(self.workfile)
if os.path.isfile(newstring):
self.workfile = newstring
break
if os.path.isfile(self.workfile.lower()):
self.workfile = self.workfile.lower()
break
if not self.workfile.endswith('.pdb'):
self.workfile += '.pdb'
if os.path.isfile(self.workfile):
print 'INFO: Using file \'{}\' instead.'.format(self.workfile)
break
else:
if os.path.isfile(self.workfile):
self.options['filename'] = self.workfile
break
else: # here the possibility is handled, that the user is in interactive mode and created a .hkl already
if self.options['d']:
hklfilename = options['d'] # the filename of the sf file is taken and an input filename suggested
if hklfilename.startswith('@'):
possiblePdbFilename = hklfilename
# else:
# possiblePdbFilename = ''.join(str(hklfilename).split('.')[:-1]) + '.pdb'
while True:
self.workfile = raw_input("\nEnter name of PDB file to read (To download a PDB "
"file enter \'@<PDBCODE>\')[{}]: ".format(possiblePdbFilename)) #.upper()
if not self.workfile:
self.workfile = possiblePdbFilename
# if os.path.isfile(self.workfile):
# print self.workfile, 'exists'
# self.options['filename'] = self.workfile
if not os.path.isfile(self.workfile) and not self.workfile.startswith('@'):
newstring = str(self.workfile[:-4].upper())+str(self.workfile[-4:])
if not os.path.isfile(newstring) and not os.path.isfile(self.workfile.lower()):
# print self.workfile.upper(), newstring, self.workfile.lower()
print 'INFO: File \'{}\' not found.'.format(self.workfile)
if os.path.isfile(newstring):
self.workfile = newstring
break
if os.path.isfile(self.workfile.lower()):
self.workfile = self.workfile.lower()
break
if not self.workfile.endswith('.pdb'):
self.workfile += '.pdb'
if os.path.isfile(self.workfile):
print 'INFO: Using file \'{}\' instead.'.format(self.workfile)
break
else:
break
else:
self.workfile = self.workfile
if self.workfile.startswith('@'): # if the user was asked for a filename, it is transferred to options
if not self.options['filename']:
self.options['filename'] = self.workfile # now a correct output filename can be created
if self.options['r']:
self.usePDBredo = True
else:
# if not self.options['i']:
self.askPDBredo()
if self.usePDBredo:
self.options['r'] = True
if self.workfile.startswith('@') and self.options['r']:
from getPDBFiles import fetchPDBredo
self.workfile = fetchPDBredo(self.workfile[1:], self.options)
elif self.workfile.startswith('@'): # here elif when the if statement before is used!
from getPDBFiles import fetchPDB
# print self.workfile[1:]
self.workfile = fetchPDB(self.workfile[1:], self.options)
# print "INFO: Fetching PDB file for entry {}.".format(self.workfile)
if self.workfile:
break
else:
# self.askPDBredo()
# if self.usePDBredo:
# self.options['r'] = True
break
# self.workfile = '3LOH.pdb' # nur zu Testzwecken
def read(self):
"""
Takes the pdb file given from askFilename and opens it.
:return: Lines from pdb file as data.f
"""
self.askFilename()
try:
workf = open(self.workfile, 'r')
self.dataf = workf.readlines()
# print "reading workfile \'{}\'.".format(self.workfile)
print 'INFO: File {} successfully opened.'.format(self.workfile)
except IOError:
self.askFilename()
def askOutputFilename(self):
"""
Asks the user for a name for the output file .ins.
:return: output filename
"""
# defaultName = os.path.splitext(self.workfile)[0] + '.ins'
if '_' in self.workfile:
defaultName = os.path.splitext(self.workfile)[0].split('_')[0] + '.ins'
elif self.options['filename'].startswith('@'):
defaultName = self.options['filename'].split('@')[1] + '.ins'
else:
defaultName = os.path.splitext(self.workfile)[0] + '.ins'
if not self.options['i']:
self.outputFilename = raw_input("\nEnter .ins filename to be created [{}]: ".format(defaultName))
if not self.outputFilename:
self.outputFilename = defaultName
elif not self.outputFilename.endswith('.ins'):
self.outputFilename += '.ins'
else:
self.outputFilename = self.options['o']
if not self.outputFilename:
self.outputFilename = defaultName
elif not self.outputFilename.endswith('.ins'):
self.outputFilename += '.ins'
def writeFile(self, data):
"""
writes the joined strings from DATA in an output file either named by the user in askOutputFilename or calls it
newfile.ins.
:param data: Gives joined string of all data needed for the .ins file
:return: A new file in .ins def getTerminalResidues(self):format.
"""
self.askOutputFilename()
if not self.outputFilename:
print "INFO: File newfile.ins created."
f = open("newfile.ins", 'w')
f.write(''.join(data.strings))
else:
print "INFO: File {} created.".format(self.outputFilename)
f = open("{}".format(self.outputFilename), 'w')
f.write(''.join(data.strings))
f.close()
global start_time
t = (time.time() - start_time)
print 'INFO: +++ PDB2INS finished running after {:3.4f} seconds. +++ '.format(t)
class Header(object):
def __init__(self):
self.cell = None
self.crystLine = None
self.cellError = []
self.remark200Lines = []
self.scaleLine = []
self.wavelength = None
self.lattice = None
self.sequenceLines = []
self.ssBondLines = []
self.resiDict = {}
self.generalRefinement = []
self.generalRefinement2 = []
self.generalRefinement3 = []
self.generalRefinement2Order = None
self.ssBondList = []
self.userCell = None
self.hklf = None
self.zValue = None
self.spaceGroup = None
self.shortenedSpaceGroup = None
self.kisselfile = None
self.kisselContent = None
self.dispInstructions = []
self.makeDISP = False
def interpretLine(self, line):
"""
Takes all lines from the input file and sorts the according to their first six letters.
:param line: From the input PDB file.
:return: crystLine to extract all crystallographic data and remark200Line to extract wavelength.
"""
if line[:6] == "CRYST1":
# print line
self.crystLine = line
self.askHklf()
self.extractCell()
self.extractSpaceGroup()
self.extractZvalue()
self.estimateCellError()
if line[:10] == "REMARK 200":
self.remark200Lines.append(line)
if line[:6] == "SEQRES":
self.sequenceLines.append(line)
if line[:5] == "SCALE":
self.scaleLine.append(line)
if line[:6] == 'NUMMDL':
self.extractNumberOfModels(line)
def extractNumberOfModels(self, line):
"""
If the line of the PDB file starts with NUMMDL, the number of models in the file should follow.
Is this number greater than one. pdb2ins will terminate. Pdb2ins as well as shelxl cannot handle multiple
models.
:param line:
:return:
"""
l = line[6:].strip()
try:
numberOfModels = int(l)
except ValueError:
numberOfModels = 2
pass
if numberOfModels > 1:
print 'ERROR: This PDB file contains more than one model. PDB2INS cannot handle multiple models.\n' \
'*** PDB2INS is terminated without writing an .ins file. ***'
exit()
def extractCell(self):
"""
If the entry describes a structure determined by a technique other than X-ray crystallography,
CRYST1 contains a = b = c = 1.0, alpha = beta = gamma = 90 degrees, space group = P 1, and Z = 1.
"""
if options['c']:
try:
arg = options['c'].split(',')
arg = [float(i) for i in arg if i]
cell_a = arg[0]
cell_b = arg[1]
cell_c = arg[2]
alpha = arg[3]
beta = arg[4]
gamma = arg[5]
self.cell = [cell_a, cell_b, cell_c, alpha, beta, gamma]
print 'INFO: Cell is set to {}.'.format(self.cell)
except ValueError:
self.cell = None
except IndexError:
self.cell = None
if not self.cell:
try:
cell_a = float(self.crystLine[6:15])
cell_b = float(self.crystLine[15:24])
cell_c = float(self.crystLine[24:33])
alpha = float(self.crystLine[33:40])
beta = float(self.crystLine[40:47])
gamma = float(self.crystLine[47:54])
self.cell = [cell_a, cell_b, cell_c, alpha, beta, gamma]
# print self.cell
if options['c']:
print 'INFO: Cell is set to {}.'.format(self.cell)
except ValueError:
# print cell_a, cell_b, cell_c
self.cell = None
if self.cell:
if (cell_a + cell_b + cell_c) < 1:
self.cell = None
if cell_a <= 15.00 or not 20 < alpha < 160:
print "\nINFO: Warning: Cell may not be correct! Please check."
# print self.cell, float(cell_a)
#if options['i']:
# print ' ** Error: No cell found. ** '
# exit()
reply = raw_input("Cell found: ({a} {b} {c} {alpha} {beta} {gamma}). This may not be correct. \n"
"Please enter correct cell [{a} {b} {c} "
"{alpha} {beta} {gamma}]:".format(a=float(cell_a), b=float(cell_b), c=float(cell_c),
alpha=float(alpha), beta=float(beta),
gamma=float(gamma)))
if not reply:
pass
else:
try:
arg = reply.split(' ')
arg = [float(i) for i in arg if i]
cell_a = arg[0]
cell_b = arg[1]
cell_c = arg[2]
alpha = arg[3]
beta = arg[4]
gamma = arg[5]
self.cell = [cell_a, cell_b, cell_c, alpha, beta, gamma]
except IndexError:
self.cell = None
print 'Cell given is not valid: ', reply
except TypeError:
self.cell = None
else:
while not self.cell:
self.askCell()
def askCell(self):
if not options['c']:
reply = raw_input('Enter cell (a b c alpha beta gamma):')
try:
arg = reply.split(' ')
arg = [float(i) for i in arg if i]
cell_a = arg[0]
cell_b = arg[1]
cell_c = arg[2]
alpha = arg[3]
beta = arg[4]
gamma = arg[5]
except IndexError:
return False
except TypeError:
return False
if cell_a <= 2.00 or not 20 < alpha < 160:
print "INFO: Warning: Cell may not be correct! Please check."
#if options['i']:
# print ' ** Error: No cell found. ** '
# exit()
self.cell = [cell_a, cell_b, cell_c, alpha, beta, gamma]
# print self.cell
else:
self.cell = options['c']
# print 'INFO: cell is set to {}.'.format(self.cell)
def askHklf(self):
"""
Sets the HKLF code. Default value is 4.
:return:
"""
# print options['i'], options['h']
# if options['b']:
# self.hklf = Data.getHKLF()
if not self.hklf:
if not options['i'] and not options['h']:
reply = raw_input('\nEnter HKLF code (3 for F, 4 for F-squared) [4]:')
if not reply:
self.hklf = '4'
elif reply == '4' or reply == '3':
self.hklf = reply
elif options['h']:
self.hklf = options['h']
else:
self.hklf = 3
print 'INFO: HKLF is set to default: HKLF{}. Please check if HKLF is correct.'.format(self.hklf)
# else:
# self.hklf = '4'
# print " ** ERROR: Input HKLF code {} not valid. " \
# "HKLF code was set to value '4' for F-squared. ** ".format(reply)
def getHklf(self):
return 'HKLF {}\n'.format(self.hklf)
def getCell(self):
# self.userCell = options['c']
# if self.userCell:
# return self.userCell
# else:
return self.cell
def estimateCellError(self):
"""
The cell error is calculated.
The sides of the cell get an error of 0.1% and
the cell angles get a deviation of 0.05 if the angles are not within an range of 0.01 degrees to 90 degrees.
:return:
"""
for i in self.cell[:3]:
self.cellError.append(i * 0.001)
for i in self.cell[3:]:
if 89.99 < i < 90.01:
self.cellError.append(0.00)
else:
self.cellError.append(0.05)
def getCellError(self):
return self.cellError
def extractSpaceGroup(self):
"""
The full International Tables Hermann-Mauguin symbol is used, e.g., P 1 21 1 instead of P 21.
The Hermann-Mauguin space group symbol is given without parenthesis, e.g., P 43 21 2.
Please note that the screw axis is described as a two digit number.
Even when no PDB entry today is fulfilling this criteria,
the main whether the space group is starting with an 'R" even if it is rhombohedral obverse on hexagonal axes
remains in the program.
"""
doNotReplace1 = ['P 1', 'A 1', 'B 1', 'C 1', 'I 1', 'F 1', 'P 3 1 2', 'P 3 2 1', 'P 31 1 2', 'P 31 2 1',
'P 32 1 2', 'P 32 2 1']
if not options['s']:
try:
self.spaceGroup = self.crystLine[55:66].strip('\n')
if self.spaceGroup.strip() not in doNotReplace1:
self.spaceGroup = self.spaceGroup.replace(' 1', '').lstrip()
if self.spaceGroup[1] == "R":
x = self.cell[6] - self.cell[5]
if x >= 20:
self.spaceGroup[1] = "H"
self.abbreviateSpaceGroup()
if not self.validateSpaceGroup(self.getAbbrSpaceGroup()):