-
Notifications
You must be signed in to change notification settings - Fork 50
/
sxwindow.py
1347 lines (1196 loc) · 76 KB
/
sxwindow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
from __future__ import print_function
#
# Author: Toshio Moriya 10/20/2016 (toshio.moriya@mpi-dortmund.mpg.de)
# Author: T. Durmaz 08/29/2014 (tunay.durmaz@uth.tmc.edu)
#
# This software is issued under a joint BSD/GNU license. You may use the
# source code in this file under either license. However, note that the
# complete EMAN2 and SPHIRE software packages have some GPL dependencies,
# so you are responsible for compliance with the licenses of these packages
# if you opt to use BSD licensing. The warranty disclaimer below holds
# in either instance.
#
# This complete copyright notice must be included in any revised version of the
# source code. Additional authorship citations may be added, but existing
# author citations must be preserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import EMAN2_cppwrap
import EMAN2db
import EMAN2jsondb
import applications
import filter
import fundamentals
import glob
import global_def
import inspect
import morphology
import mpi
import optparse
import os
import shutil
import statistics
import sys
import time
import utilities
pass#IMPORTIMPORTIMPORT import EMAN2
pass#IMPORTIMPORTIMPORT import EMAN2_cppwrap
pass#IMPORTIMPORTIMPORT import EMAN2db
pass#IMPORTIMPORTIMPORT import EMAN2jsondb
pass#IMPORTIMPORTIMPORT import applications
pass#IMPORTIMPORTIMPORT import filter
pass#IMPORTIMPORTIMPORT import fundamentals
pass#IMPORTIMPORTIMPORT import glob
pass#IMPORTIMPORTIMPORT import global_def
pass#IMPORTIMPORTIMPORT import inspect
pass#IMPORTIMPORTIMPORT import json
pass#IMPORTIMPORTIMPORT import morphology
pass#IMPORTIMPORTIMPORT import mpi
pass#IMPORTIMPORTIMPORT import optparse
pass#IMPORTIMPORTIMPORT import os
pass#IMPORTIMPORTIMPORT import shutil
pass#IMPORTIMPORTIMPORT import sparx
pass#IMPORTIMPORTIMPORT import statistics
pass#IMPORTIMPORTIMPORT import sys
pass#IMPORTIMPORTIMPORT import time
pass#IMPORTIMPORTIMPORT import utilities
from builtins import range
pass#IMPORTIMPORTIMPORT import os, sys
pass#IMPORTIMPORTIMPORT from optparse import OptionParser, SUPPRESS_HELP
pass#IMPORTIMPORTIMPORT import glob
pass#IMPORTIMPORTIMPORT import json
pass#IMPORTIMPORTIMPORT import shutil
pass#IMPORTIMPORTIMPORT from EMAN2 import *
pass#IMPORTIMPORTIMPORT from EMAN2db import *
pass#IMPORTIMPORTIMPORT from EMAN2jsondb import *
pass#IMPORTIMPORTIMPORT from sparx import *
pass#IMPORTIMPORTIMPORT from applications import MPI_start_end
pass#IMPORTIMPORTIMPORT from morphology import ampcont2angle
pass#IMPORTIMPORTIMPORT from inspect import currentframe, getframeinfo
pass#IMPORTIMPORTIMPORT from utilities import generate_ctf
pass#IMPORTIMPORTIMPORT import global_def
pass#IMPORTIMPORTIMPORT from global_def import *
# ========================================================================================
# Define functions for reading coordinates files of different formats.
# One of these will be used in main() through a first-class data type variable of Python
# (like function pointer in C/C++)
# This way, switch statement is unnecessary inside of the coordinates loop.
# ========================================================================================
def read_sphire_coords_file(coords_path):
# coords_list = read_text_row(coords_path)
coords_list = utilities.read_text_row(coords_path, skip="#")
return coords_list
def read_eman1_coords_file(coords_path):
coords_list = utilities.read_text_row(coords_path)
for i in range(len(coords_list)):
coords_list[i] = [(coords_list[i][0] + coords_list[i][2] // 2), (coords_list[i][1] + coords_list[i][3] // 2)]
return coords_list
def read_eman2_coords_file(coords_path):
coords_list = EMAN2jsondb.js_open_dict(coords_path)["boxes"]
for i in range(len(coords_list)):
coords_list[i] = [coords_list[i][0], coords_list[i][1]]
return coords_list
def read_spider_coords_file(coords_path):
coords_list = utilities.read_text_row(coords_path)
for i in range(len(coords_list)):
coords_list[i] = [coords_list[i][2], coords_list[i][3]]
return coords_list
# ========================================================================================
# Helper functions
# ========================================================================================
# ----------------------------------------------------------------------------------------
# Generate command line
# ----------------------------------------------------------------------------------------
def get_cmd_line():
cmd_line = ""
for arg in sys.argv:
cmd_line += arg + " "
cmd_line = "Shell line command: " + cmd_line
return cmd_line
# ----------------------------------------------------------------------------------------
# Get suffix of current time stamp
# ----------------------------------------------------------------------------------------
def get_time_stamp_suffix():
pass#IMPORTIMPORTIMPORT from time import strftime, localtime
time_stamp_suffix = time.strftime("%Y%m%d_%H%M%S", time.localtime())
return time_stamp_suffix
# ----------------------------------------------------------------------------------------
# Data type checker
# ----------------------------------------------------------------------------------------
def is_float(value):
try:
float(value)
return True
except ValueError:
return False
# ========================================================================================
# Main function
# ========================================================================================
def main():
program_name = os.path.basename(sys.argv[0])
usage = program_name + """ input_micrograph_pattern input_coordinates_pattern input_ctf_params_source output_directory --selection_list=selection_list --coordinates_format --box_size=box_size --skip_invert --limit_ctf --astigmatism_error=astigmatism_error --resample_ratio=resample_ratio --check_consistency
Window particles from micrographs using the particles coordinates.
All Micrographs Mode - Process all micrographs in a directory:
Specify path pattern of input micrographs and coordinates files with a wild card (*).
Use the wild card to indicate the place of variable part of the file names (e.g. serial number, time stamp, and etc).
The path pattern must be enclosed by single quotes (') or double quotes ("). (Note: sxgui.py automatically adds single quotes (')).
The substring at the variable part must be same between a associated pair of input micrograph and coordinates file.
BDB files can not be selected as input micrographs.
Next, specify the source of CTF paramters.
For cryo data, this should be the file produced by sxcter and normally called partres.txt.
For negative staining data, it should be the pixel size [A/Pixels] of input micrographs.
Finally, specify output directory where all outputs should be saved.
In this mode, all micrographs matching the path pattern will be processed.
mpirun -np 32 sxwindow.py './mic*.hdf' 'info/mic*_info.json' outdir_cter/partres/partres.txt particles --coordinates_format=eman2 --box_size=64
Selected Micrographs Mode - Process all micrographs in a selection list file:
In addition to input micrographs path pattern, coordinates files path pattern, CTF paramters source, and output directry arguments,
specify a name of micrograph selection list text file using --selection_list option.
In this mode, only micrographs in the selection list which matches the file name part of the pattern (ignoring the directory paths) will be processed.
If a micrograph name in the selection list does not exists in the directory specified by the micrograph path pattern, processing of the micrograph will be skipped.
mpirun -np 32 sxwindow.py './mic*.hdf' 'info/mic*_info.json' outdir_cter/partres/partres.txt particles --selection_list=mic_list.txt --coordinates_format=eman2 --box_size=64
Single Micrograph Mode - Process a single micrograph:
In addition to input micrographs path pattern, coordinates files path pattern, CTF paramters source, and output directry arguments,
specify a single micrograph name using --selection_list option.
In this mode, only the specified single micrograph will be processed.
If this micrograph name does not matches the file name part of the pattern (ignoring the directory paths), the process will exit without processing it.
If this micrograph name matches the file name part of the pattern but does not exists in the directory which specified by the micrograph path pattern, again the process will exit without processing it.
Use single processor for this mode.
sxwindow.py './mic*.hdf' 'info/mic*_info.json' outdir_cter/partres/partres.txt particles --selection_list=mic0.hdf --coordinates_format=eman2 --box_size=64
For negative staining data, set the pixel size [A/Pixels] as the source of CTF paramters and use --skip_invert.
mpirun -np 32 sxwindow.py './mic*.hdf' 'info/mic*_info.json' 5.2 particles --coordinates_format=eman2 --box_size=64 --skip_invert
"""
parser = optparse.OptionParser(usage, version=global_def.SPARXVERSION)
parser.add_option("--selection_list", type="string", default=None, help="Micrograph selecting list: Specify a name of micrograph selection list text file for Selected Micrographs Mode. The file extension must be \'.txt\'. Alternatively, the file name of a single micrograph can be specified for Single Micrograph Mode. (default none)")
parser.add_option("--coordinates_format", type="string", default="eman1", help="Coordinate file format: Allowed values are \'sphire\', \'eman1\', \'eman2\', or \'spider\'. The sphire, eman2, and spider formats use the particle center as coordinates. The eman1 format uses the lower left corner of the box as coordinates. (default eman1)")
parser.add_option("--box_size", type="int", default=256, help="Particle box size [Pixels]: The x and y dimensions of square area to be windowed. The box size after resampling is assumed when resample_ratio < 1.0. (default 256)")
parser.add_option("--skip_invert", action="store_true", default=False, help="Skip invert image contrast: Use this option for negative staining data. By default, the image contrast is inverted for cryo data. (default False)")
parser.add_option("--limit_ctf", action="store_true", default=False, help="Use CTF limit filter: Frequencies where CTF oscillations can not be properly modeled with the resampled pixel size will be discarded in the images with the appropriate low-pass filter. This has no effects when the CTER partres file is not specified by the CTF paramters source argument. (default False)")
parser.add_option("--astigmatism_error", type="float", default=360.0, help="Astigmatism error limit [Degrees]: Set astigmatism to zero for all micrographs where the angular error computed by sxcter is larger than the desired value. This has no effects when the CTER partres file is not specified by the CTF paramters source argument. (default 360.0)")
parser.add_option("--resample_ratio", type="float", default=1.0, help="Ratio between new and original pixel size: Use a value between 0.0 and 1.0 (excluding 0.0). The new pixel size will be automatically recalculated and stored in CTF paramers when resample_ratio < 1.0 is used. (default 1.0)")
parser.add_option("--check_consistency", action="store_true", default=False, help="Check consistency of dataset: Create a text file containing the list of Micrograph ID entries might have inconsitency among the provided dataset. (i.e. mic_consistency_check_info_TIMESTAMP.txt). (default False)")
(options, args) = parser.parse_args(sys.argv[1:])
# ====================================================================================
# Prepare processing
# ====================================================================================
# ------------------------------------------------------------------------------------
# Set up MPI related variables
# ------------------------------------------------------------------------------------
# Detect if program is running under MPI
RUNNING_UNDER_MPI = "OMPI_COMM_WORLD_SIZE" in os.environ
main_mpi_proc = 0
if RUNNING_UNDER_MPI:
pass#IMPORTIMPORTIMPORT from mpi import mpi_init
pass#IMPORTIMPORTIMPORT from mpi import MPI_COMM_WORLD, mpi_comm_rank, mpi_comm_size, mpi_barrier, mpi_reduce, MPI_INT, MPI_SUM
mpi.mpi_init(0, [])
my_mpi_proc_id = mpi.mpi_comm_rank(mpi.MPI_COMM_WORLD)
n_mpi_procs = mpi.mpi_comm_size(mpi.MPI_COMM_WORLD)
else:
my_mpi_proc_id = 0
n_mpi_procs = 1
# ------------------------------------------------------------------------------------
# Set up SPHIRE global definitions
# ------------------------------------------------------------------------------------
if global_def.CACHE_DISABLE:
pass#IMPORTIMPORTIMPORT from utilities import disable_bdb_cache
utilities.disable_bdb_cache()
# Change the name log file for error message
original_logfilename = global_def.LOGFILE
global_def.LOGFILE = os.path.splitext(program_name)[0] + '_' + original_logfilename + '.txt'
# ------------------------------------------------------------------------------------
# Print command line
# ------------------------------------------------------------------------------------
if my_mpi_proc_id == main_mpi_proc:
print(" ")
print("%s" % get_cmd_line())
# print(" ")
# ------------------------------------------------------------------------------------
# Check error conditions of arguments and options, then prepare variables for arguments
# ------------------------------------------------------------------------------------
mic_pattern = None
coords_pattern = None
ctf_params_src = None
root_out_dir = None
# Not a real while, each "if" statement has the opportunity to use break when errors need to be reported
error_status = None
while True:
# --------------------------------------------------------------------------------
# Check the number of arguments. If OK, then prepare variables for them
# --------------------------------------------------------------------------------
if error_status is None and len(args) != 4:
error_status = ("Please check usage for number of arguments.\n Usage: " + usage + "\n" + "Please run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
assert (len(args) == 4)
mic_pattern = args[0]
coords_pattern = args[1]
ctf_params_src = args[2]
root_out_dir = args[3]
# --------------------------------------------------------------------------------
# Check error conditions of arguments
# --------------------------------------------------------------------------------
if error_status is None and mic_pattern[:len("bdb:")].lower() == "bdb":
error_status = ("BDB file can not be selected as input micrographs. Please convert the format, and restart the program. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
if error_status is None and mic_pattern.find("*") == -1:
error_status = ("Input micrograph file name pattern must contain wild card (*). Please check input_micrograph_pattern argument. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
if error_status is None and coords_pattern.find("*") == -1:
error_status = ("Input coordinates file name pattern must contain wild card (*). Please check input_coordinates_pattern argument. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
if not is_float(ctf_params_src):
assert (type(ctf_params_src) is str)
# This should be string for CTER partres (CTF parameter) file
if error_status is None and os.path.exists(ctf_params_src) == False:
error_status = ("Specified CTER partres file is not found. Please check input_ctf_params_source argument. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
else:
assert (is_float(ctf_params_src))
if error_status is None and float(ctf_params_src) <= 0.0:
error_status = ("Specified pixel size is not larger than 0.0. Please check input_ctf_params_source argument. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
if error_status is None and os.path.exists(root_out_dir):
error_status = ("Output directory exists. Please change the name and restart the program.", inspect.getframeinfo(inspect.currentframe()))
break
# --------------------------------------------------------------------------------
# Check error conditions of options
# --------------------------------------------------------------------------------
if options.selection_list != None:
if error_status is None and not os.path.exists(options.selection_list):
error_status = ("File specified by selection_list option does not exists. Please check selection_list option. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
if error_status is None and options.coordinates_format.lower() not in ["sphire", "eman1", "eman2", "spider"]:
error_status = ("Invalid option value: --coordinates_format=%s. Please run %s -h for help." % (options.coordinates_format, program_name), inspect.getframeinfo(inspect.currentframe()))
break
if error_status is None and (options.box_size <= 0):
error_status = ("Invalid option value: --box_size=%s. The box size must be an interger larger than zero. Please run %s -h for help." % (options.box_size, program_name), inspect.getframeinfo(inspect.currentframe()))
break
if error_status is None and (options.resample_ratio <= 0.0 or options.resample_ratio > 1.0):
error_status = ("Invalid option value: --resample_ratio=%s. Please run %s -h for help." % (options.resample_ratio, program_name), inspect.getframeinfo(inspect.currentframe()))
break
break
utilities.if_error_then_all_processes_exit_program(error_status)
assert (mic_pattern != None)
assert (coords_pattern != None)
assert (ctf_params_src != None)
assert (root_out_dir != None)
# ------------------------------------------------------------------------------------
# Check warning conditions of options
# ------------------------------------------------------------------------------------
if my_mpi_proc_id == main_mpi_proc:
# This should be string specifying pixel size [A/Pixels]
assert (type(ctf_params_src) is str)
if is_float(ctf_params_src):
if options.limit_ctf:
print("WARNING!!! --limit_ctf option has no effects since the CTER partres file is not specified with input_ctf_params_source argument...")
if options.astigmatism_error != 360.0: # WARN User only when it is obvious that user is trying to use astigmatism_error option
print("WARNING!!! --astigmatism_error option has no effects since the CTER partres file is not specified with input_ctf_params_source argument...")
# ------------------------------------------------------------------------------------
# Check the source of CTF parameteres and select the CTF mode
# (1) Real CTF parameters mode : Use real CTF parameters stored in a CTER partres (CTF parameter) file for cryo data (use_real_ctf_params is True)
# (2) Dummy CTF parameters mode : Create dummy CTF parameters for negative staining data (use_real_ctf_params is False)
# ------------------------------------------------------------------------------------
use_real_ctf_params = not is_float(ctf_params_src)
# NOTE: 2017/12/05 Toshio Moriya
# The following code is to support the old format of CTER partres file. It should be removed near future
#
# Define enumerators for indices of parameters of each CTER partres entry in the old format(BEFIRE 2017/12/05).
# All mpi processes must have access to these indices
i_enum = -1
i_enum += 1; idx_old_cter_def = i_enum # defocus [um]; index must be same as ctf object format
i_enum += 1; idx_old_cter_cs = i_enum # Cs [mm]; index must be same as ctf object format
i_enum += 1; idx_old_cter_vol = i_enum # voltage[kV]; index must be same as ctf object format
i_enum += 1; idx_old_cter_apix = i_enum # pixel size [A]; index must be same as ctf object format
i_enum += 1; idx_old_cter_bfactor = i_enum # B-factor [A^2]; index must be same as ctf object format
i_enum += 1; idx_old_cter_total_ac = i_enum # total amplitude contrast [%]; index must be same as ctf object format
i_enum += 1; idx_old_cter_astig_amp = i_enum # astigmatism amplitude [um]; index must be same as ctf object format
i_enum += 1; idx_old_cter_astig_ang = i_enum # astigmatism angle [degree]; index must be same as ctf object format
i_enum += 1; idx_old_cter_sd_def = i_enum # std dev of defocus [um]
i_enum += 1; idx_old_cter_sd_total_ac = i_enum # std dev of total amplitude contrast [%]
i_enum += 1; idx_old_cter_sd_astig_amp = i_enum # std dev of ast amp [A]
i_enum += 1; idx_old_cter_sd_astig_ang = i_enum # std dev of ast angle [degree]
i_enum += 1; idx_old_cter_cv_def = i_enum # coefficient of variation of defocus [%]
i_enum += 1; idx_old_cter_cv_astig_amp = i_enum # coefficient of variation of ast amp [%]
i_enum += 1; idx_old_cter_spectra_diff = i_enum # average of differences between with- and without-astig. experimental 1D spectra at extrema
i_enum += 1; idx_old_cter_error_def = i_enum # frequency at which signal drops by 50% due to estimated error of defocus alone [1/A]
i_enum += 1; idx_old_cter_error_astig = i_enum # frequency at which signal drops by 50% due to estimated error of defocus and astigmatism [1/A]
i_enum += 1; idx_old_cter_error_ctf = i_enum # limit frequency by CTF error [1/A]
i_enum += 1; idx_old_cter_mic_name = i_enum # micrograph name
i_enum += 1; n_idx_old_cter = i_enum
# Define enumerators for indices of parameters of each CTER partres entry in the new format (AFTER 2017/12/05).
# All mpi processes must have access to these indices
i_enum = -1
i_enum += 1; idx_cter_def = i_enum # defocus [um]; index must be same as ctf object format
i_enum += 1; idx_cter_cs = i_enum # Cs [mm]; index must be same as ctf object format
i_enum += 1; idx_cter_vol = i_enum # voltage[kV]; index must be same as ctf object format
i_enum += 1; idx_cter_apix = i_enum # pixel size [A]; index must be same as ctf object format
i_enum += 1; idx_cter_bfactor = i_enum # B-factor [A^2]; index must be same as ctf object format
i_enum += 1; idx_cter_total_ac = i_enum # total amplitude contrast [%]; index must be same as ctf object format
i_enum += 1; idx_cter_astig_amp = i_enum # astigmatism amplitude [um]; index must be same as ctf object format
i_enum += 1; idx_cter_astig_ang = i_enum # astigmatism angle [degree]; index must be same as ctf object format
i_enum += 1; idx_cter_sd_def = i_enum # std dev of defocus [um]
i_enum += 1; idx_cter_sd_total_ac = i_enum # std dev of total amplitude contrast [%]
i_enum += 1; idx_cter_sd_astig_amp = i_enum # std dev of astigmatism amp [A]
i_enum += 1; idx_cter_sd_astig_ang = i_enum # std dev of astigmatism angle [degree]
i_enum += 1; idx_cter_cv_def = i_enum # coefficient of variation of defocus [%]
i_enum += 1; idx_cter_cv_astig_amp = i_enum # coefficient of variation of astigmatism amp [%]
i_enum += 1; idx_cter_error_def = i_enum # frequency at which signal drops by 50% due to estimated error of defocus alone [1/A]
i_enum += 1; idx_cter_error_astig = i_enum # frequency at which signal drops by 50% due to estimated error of defocus and astigmatism [1/A]
i_enum += 1; idx_cter_error_ctf = i_enum # limit frequency by CTF error [1/A]
i_enum += 1; idx_cter_max_freq = i_enum # visual-impression-based maximum frequency limit [A] (e.g. max frequency of relion; CCC between neighbour zero-crossing pair)
i_enum += 1; idx_cter_reserved = i_enum # reserved spot for maximum frequency limit or error criterion. possibly originated from external program (e.g. CTF figure of merit of RELION)
i_enum += 1; idx_cter_const_ac = i_enum # constant amplitude contrast [%]
i_enum += 1; idx_cter_phase_shift = i_enum # phase shift [degrees]
i_enum += 1; idx_cter_mic_name = i_enum # micrograph name
i_enum += 1; n_idx_cter = i_enum
# ------------------------------------------------------------------------------------
# Prepare the variables for all sections
# ------------------------------------------------------------------------------------
# Micrograph basename pattern (directory path is removed from micrograph path pattern)
mic_basename_pattern = os.path.basename(mic_pattern)
# Global entry dictionary (all possible entries from all lists) for all mic id substring
global_entry_dict = {} # mic id substring is the key
subkey_input_mic_path = "Input Micrograph Path"
subkey_selected_mic_basename = "Selected Micrograph Basename"
subkey_coords_path = "Input Coordinates File Path"
subkey_cter_entry = "CTER Partres Entry"
# List keeps only id substrings of micrographs whose all necessary information are available
valid_mic_id_substr_list = []
# ====================================================================================
# Obtain the list of micrograph id sustrings using a single CPU (i.e. main mpi process)
# ====================================================================================
# NOTE: Toshio Moriya 2016/10/24
# The below is not a real while.
# It gives if-statements an opportunity to use break when errors need to be reported
# However, more elegant way is to use 'raise' statement of exception mechanism...
#
error_status = None
while my_mpi_proc_id == main_mpi_proc:
# --------------------------------------------------------------------------------
# Prepare variables for this section
# --------------------------------------------------------------------------------
# Prefix and suffix of micrograph basename pattern
# to find the head/tail indices of micrograph id substring
mic_basename_tokens = mic_basename_pattern.split('*')
assert (len(mic_basename_tokens) == 2)
# Find head index of micrograph id substring
mic_id_substr_head_idx = len(mic_basename_tokens[0])
# Prefix and suffix of coordinates file path pattern
# to find the head/tail indices of coordinates file id substring
coords_pattern_tokens = coords_pattern.split('*')
assert (len(coords_pattern_tokens) == 2)
# Find head index of coordinates id substring
coords_id_substr_head_idx = len(coords_pattern_tokens[0])
# --------------------------------------------------------------------------------
# Register micrograph id substrings found in the input directory (specified by micrograph path pattern)
# to the global entry dictionary
# --------------------------------------------------------------------------------
# Generate the list of micrograph paths in the input directory
print(" ")
print("Checking the input directory...")
input_mic_path_list = glob.glob(mic_pattern)
# Check error condition of input micrograph file path list
print("Found %d microgarphs in %s." % (len(input_mic_path_list), os.path.dirname(mic_pattern)))
if error_status is None and len(input_mic_path_list) == 0:
error_status = ("No micrograph files are found in the directory specified by micrograph path pattern (%s). Please check input_micrograph_pattern argument. Run %s -h for help." % (os.path.dirname(mic_pattern), program_name), inspect.getframeinfo(inspect.currentframe()))
break
assert (len(input_mic_path_list) > 0)
# Register micrograph id substrings to the global entry dictionary
for input_mic_path in input_mic_path_list:
# Find tail index of micrograph id substring and extract the substring from the micrograph name
input_mic_basename = os.path.basename(input_mic_path)
mic_id_substr_tail_idx = input_mic_basename.index(mic_basename_tokens[1])
mic_id_substr = input_mic_basename[mic_id_substr_head_idx:mic_id_substr_tail_idx]
assert (input_mic_path == mic_pattern.replace("*", mic_id_substr))
if not mic_id_substr in global_entry_dict:
# print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from input_mic_path_list " % (mic_id_substr))
global_entry_dict[mic_id_substr] = {}
assert (mic_id_substr in global_entry_dict)
global_entry_dict[mic_id_substr][subkey_input_mic_path] = input_mic_path
assert (len(global_entry_dict) > 0)
# --------------------------------------------------------------------------------
# Register micrograph id substrings found in the selection list
# to the global entry dictionary
# --------------------------------------------------------------------------------
# Generate the list of selected micrograph paths in the selection file
selected_mic_path_list = []
# Generate micrograph lists according to the execution mode
if options.selection_list == None:
print(" ")
print("----- Running with All Micrographs Mode -----")
# Treat all micrographs in the input directory as selected ones
selected_mic_path_list = input_mic_path_list
else:
assert (options.selection_list != None)
if os.path.splitext(options.selection_list)[1] == ".txt":
print(" ")
print("----- Running with Selected Micrographs Mode -----")
print(" ")
print("Checking the selection list...")
assert (os.path.exists(options.selection_list))
selected_mic_path_list = utilities.read_text_file(options.selection_list)
# Check error condition of micrograph entry lists
print("Found %d microgarph entries in %s." % (len(selected_mic_path_list), options.selection_list))
if error_status is None and len(selected_mic_path_list) == 0:
error_status = ("No micrograph entries are found in the selection list file. Please check selection_list option. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
assert (len(selected_mic_path_list) > 1)
if error_status is None and not isinstance(selected_mic_path_list[0], str):
error_status = ("Invalid format of the selection list file. The first column must contain micrograph paths in string type. Please check selection_list option. Run %s -h for help." % (program_name), inspect.getframeinfo(inspect.currentframe()))
break
else:
print(" ")
print("----- Running with Single Micrograph Mode -----")
print(" ")
print("Processing a single micorgprah: %s..." % (options.selection_list))
selected_mic_path_list = [options.selection_list]
assert (len(selected_mic_path_list) > 0)
selected_mic_directory = os.path.dirname(selected_mic_path_list[0])
if selected_mic_directory != "":
print(" NOTE: Program disregards the directory paths in the selection list (%s)." % (selected_mic_directory))
assert (len(selected_mic_path_list) > 0)
# Register micrograph id substrings to the global entry dictionary
for selected_mic_path in selected_mic_path_list:
# Find tail index of micrograph id substring and extract the substring from the micrograph name
selected_mic_basename = os.path.basename(selected_mic_path)
mic_id_substr_tail_idx = selected_mic_basename.index(mic_basename_tokens[1])
mic_id_substr = selected_mic_basename[mic_id_substr_head_idx:mic_id_substr_tail_idx]
if error_status is None and selected_mic_basename != mic_basename_pattern.replace("*", mic_id_substr):
error_status = ("A micrograph name (%s) in the input directory (%s) does not match with input micrograph basename pattern (%s) (The wild card replacement with \'%s\' resulted in \'%s\'). Please correct input micrograph path pattern. Run %s -h for help." % (selected_mic_basename, os.path.dirname(mic_pattern), mic_basename_pattern, mic_id_substr, mic_basename_pattern.replace("*", mic_id_substr), program_name), inspect.getframeinfo(inspect.currentframe()))
break
if not mic_id_substr in global_entry_dict:
# print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from selected_mic_path_list " % (mic_id_substr))
global_entry_dict[mic_id_substr] = {}
assert (mic_id_substr in global_entry_dict)
global_entry_dict[mic_id_substr][subkey_selected_mic_basename] = selected_mic_basename
assert (len(global_entry_dict) > 0)
del selected_mic_path_list # Do not need this anymore
del input_mic_path_list # Do not need this anymore
# --------------------------------------------------------------------------------
# Register coordinates id substrings in coordinate path list to the global entry dictionary.
# coordinates id substring (coords_id_substr) and micrograph id substring (mic_id_substr)
# should be the same for the associated pair of micrograph and coordnates file.
# --------------------------------------------------------------------------------
print(" ")
print("Checking the coordinates files...")
coords_path_list = glob.glob(coords_pattern)
# Check error condition of coordinates file path list
print("Found %d coordinates files in %s directory." % (len(coords_path_list), os.path.dirname(coords_pattern)))
if error_status is None and len(coords_path_list) == 0:
error_status = ("No coordinates files are found in the directory specified by coordinates file path pattern (%s). Please check input_coordinates_pattern argument. Run %s -h for help." % (os.path.dirname(coords_pattern), program_name), inspect.getframeinfo(inspect.currentframe()))
break
assert (len(coords_path_list) > 0)
for coords_path in coords_path_list:
# Find tail index of coordinates id substring and extract the substring from the coordinates file path
coords_id_substr_tail_idx = coords_path.index(coords_pattern_tokens[1])
coords_id_substr = coords_path[coords_id_substr_head_idx:coords_id_substr_tail_idx]
assert (coords_path == coords_pattern.replace("*", coords_id_substr))
if not coords_id_substr in global_entry_dict:
# print("MRK_DEBUG: Added new coords_id_substr (%s) to global_entry_dict from coords_path_list " % (coords_id_substr))
global_entry_dict[coords_id_substr] = {}
assert (coords_id_substr in global_entry_dict)
global_entry_dict[coords_id_substr][subkey_coords_path] = coords_path
assert (len(global_entry_dict) > 0)
del coords_path_list # Do not need this anymore
# --------------------------------------------------------------------------------
# If necessary, register micrograph id substrings of CTER partres entries to the global entry dictionary
# --------------------------------------------------------------------------------
if use_real_ctf_params:
# This should be string for CTER partres (CTF parameter) file
print(" ")
print("Checking the CTER partres file...")
assert (os.path.exists(ctf_params_src))
cter_entry_list = utilities.read_text_row(ctf_params_src)
# Check error condition of CTER partres entry list
print("Found %d CTER partres entries in %s." % (len(cter_entry_list), ctf_params_src))
if error_status is None and len(cter_entry_list) == 0:
error_status = ("No CTER partres entries are found in %s. Please check input_ctf_params_source argument. Run %s -h for help." % (ctf_params_src, program_name), inspect.getframeinfo(inspect.currentframe()))
break
assert (len(cter_entry_list) > 0)
#
# NOTE: 2017/12/05 Toshio Moriya
# The following code is to support the old format of CTER partres file. It should be removed near future
#
if error_status is None and len(cter_entry_list[0]) != n_idx_cter and len(cter_entry_list[0]) != n_idx_old_cter:
error_status = ("The number of columns (%d) has to be %d in %s." % (len(cter_entry_list[0]), n_idx_cter, ctf_params_src), inspect.getframeinfo(inspect.currentframe()))
break
assert len(cter_entry_list[0]) == n_idx_cter or len(cter_entry_list[0]) == n_idx_old_cter
# For NEW CTER partres format (AFTER 2017/12/05)
if len(cter_entry_list[0]) == n_idx_cter:
cter_mic_directory = os.path.dirname(cter_entry_list[0][idx_cter_mic_name])
if cter_mic_directory != "":
print(" NOTE: Program disregards the directory paths in the CTER partres file (%s)." % (cter_mic_directory))
for cter_entry in cter_entry_list:
# Find tail index of micrograph id substring and extract the substring from the micrograph path of CTER partres entry
cter_mic_path = cter_entry[idx_cter_mic_name]
cter_mic_basename = os.path.basename(cter_mic_path)
mic_id_substr_tail_idx = cter_mic_basename.index(mic_basename_tokens[1])
mic_id_substr = cter_mic_basename[mic_id_substr_head_idx:mic_id_substr_tail_idx]
# Between cter_mic_path and mic_path, directory paths might be different but the basenames should be same!
if error_status is None and cter_mic_basename != mic_basename_pattern.replace("*", mic_id_substr):
error_status = ("A micrograph name (%s) in the CTER partres file (%s) does not match with input micrograph basename pattern (%s) (The wild card replacement with \'%s\' resulted in \'%s\'). Please check the CTER partres file and correct input micrograph path pattern. Run %s -h for help." % (cter_mic_basename, ctf_params_src, mic_basename_pattern, mic_id_substr, mic_basename_pattern.replace("*", mic_id_substr), program_name), inspect.getframeinfo(inspect.currentframe()))
break
if(cter_entry[idx_cter_sd_astig_ang] > options.astigmatism_error):
print(" NOTE: Astigmatism angular SD of %s (%f degree) exceeds specified limit (%f degree). Resetting astigmatism parameters to zeros..." % (cter_mic_basename, cter_entry[idx_cter_sd_astig_ang], options.astigmatism_error))
cter_entry[idx_cter_astig_amp] = 0.0
cter_entry[idx_cter_astig_ang] = 0.0
if not mic_id_substr in global_entry_dict:
# print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from cter_entry_list " % (mic_id_substr))
global_entry_dict[mic_id_substr] = {}
assert (mic_id_substr in global_entry_dict)
global_entry_dict[mic_id_substr][subkey_cter_entry] = cter_entry
assert (len(global_entry_dict) > 0)
# For OLD CTER partres format (BEFORE 2017/12/05)
else:
assert len(cter_entry_list[0]) == n_idx_old_cter, "MKR_DEBUG"
print("WARNING!!!: Number of columns is %d in the specified CTER partres file %s. The format might be old because the new one should contain %d colums." % (len(cter_entry_list[0]), ctf_params_src, n_idx_cter))
print(" We will stop supporting the old format in near future. Please consider rerun CTER.")
cter_mic_directory = os.path.dirname(cter_entry_list[0][idx_old_cter_mic_name])
if cter_mic_directory != "":
print(" NOTE: Program disregards the directory paths in the CTER partres file (%s)." % (cter_mic_directory))
for old_cter_entry in cter_entry_list:
# Convert each entry to new format
#
# assume amplitude amplitude contrast is total amplitude constrast in [%], estimated as variable Volta phase shift. Conver it to [deg].
# Also, assuming constant amplitude contrast is zero since there is no information available in the old format.
#
cter_entry = [None] * n_idx_cter
cter_entry[idx_cter_def] = old_cter_entry[idx_old_cter_def]
cter_entry[idx_cter_cs] = old_cter_entry[idx_old_cter_cs]
cter_entry[idx_cter_vol] = old_cter_entry[idx_old_cter_vol]
cter_entry[idx_cter_apix] = old_cter_entry[idx_old_cter_apix]
cter_entry[idx_cter_bfactor] = old_cter_entry[idx_old_cter_bfactor]
cter_entry[idx_cter_total_ac] = old_cter_entry[idx_old_cter_total_ac]
cter_entry[idx_cter_astig_amp] = old_cter_entry[idx_old_cter_astig_amp]
cter_entry[idx_cter_astig_ang] = old_cter_entry[idx_old_cter_astig_ang]
cter_entry[idx_cter_sd_def] = old_cter_entry[idx_old_cter_sd_def]
cter_entry[idx_cter_sd_total_ac] = old_cter_entry[idx_old_cter_sd_total_ac]
cter_entry[idx_cter_sd_astig_amp] = old_cter_entry[idx_old_cter_sd_astig_amp]
cter_entry[idx_cter_sd_astig_ang] = old_cter_entry[idx_old_cter_sd_astig_ang]
cter_entry[idx_cter_cv_def] = old_cter_entry[idx_old_cter_cv_def]
cter_entry[idx_cter_cv_astig_amp] = old_cter_entry[idx_old_cter_cv_astig_amp]
cter_entry[idx_cter_error_def] = old_cter_entry[idx_old_cter_error_def]
cter_entry[idx_cter_error_astig] = old_cter_entry[idx_old_cter_error_astig]
cter_entry[idx_cter_error_ctf] = old_cter_entry[idx_old_cter_error_ctf]
cter_entry[idx_cter_max_freq] = 0.5/old_cter_entry[idx_old_cter_apix] # Set to Nyquist frequency
cter_entry[idx_cter_reserved] = 0.0
cter_entry[idx_cter_const_ac] = 0.0
cter_entry[idx_cter_phase_shift] = morphology.ampcont2angle(old_cter_entry[idx_old_cter_total_ac])
cter_entry[idx_cter_mic_name] = old_cter_entry[idx_old_cter_mic_name]
assert (len(cter_entry) == n_idx_cter)
# Find tail index of micrograph id substring and extract the substring from the micrograph path of CTER partres entry
cter_mic_path = cter_entry[idx_cter_mic_name]
cter_mic_basename = os.path.basename(cter_mic_path)
mic_id_substr_tail_idx = cter_mic_basename.index(mic_basename_tokens[1])
mic_id_substr = cter_mic_basename[mic_id_substr_head_idx:mic_id_substr_tail_idx]
# Between cter_mic_path and mic_path, directory paths might be different but the basenames should be same!
if error_status is None and cter_mic_basename != mic_basename_pattern.replace("*", mic_id_substr):
error_status = ("A micrograph name (%s) in the CTER partres file (%s) does not match with input micrograph basename pattern (%s) (The wild card replacement with \'%s\' resulted in \'%s\'). Please check the CTER partres file and correct input micrograph path pattern. Run %s -h for help." % (cter_mic_basename, ctf_params_src, mic_basename_pattern, mic_id_substr, mic_basename_pattern.replace("*", mic_id_substr), program_name), inspect.getframeinfo(inspect.currentframe()))
break
if(cter_entry[idx_cter_sd_astig_ang] > options.astigmatism_error):
print(" NOTE: Astigmatism angular SD of %s (%f degree) exceeds specified limit (%f degree). Resetting astigmatism parameters to zeros..." % (cter_mic_basename, cter_entry[idx_cter_sd_astig_ang], options.astigmatism_error))
cter_entry[idx_cter_astig_amp] = 0.0
cter_entry[idx_cter_astig_ang] = 0.0
if not mic_id_substr in global_entry_dict:
# print("MRK_DEBUG: Added new mic_id_substr (%s) to global_entry_dict from cter_entry_list " % (mic_id_substr))
global_entry_dict[mic_id_substr] = {}
assert (mic_id_substr in global_entry_dict)
global_entry_dict[mic_id_substr][subkey_cter_entry] = cter_entry
assert (len(global_entry_dict) > 0)
del cter_entry_list # Do not need this anymore
# --------------------------------------------------------------------------------
# Clean up variables related to registration to the global entry dictionary
# --------------------------------------------------------------------------------
del mic_basename_tokens
del mic_id_substr_head_idx
del coords_pattern_tokens
del coords_id_substr_head_idx
# --------------------------------------------------------------------------------
# Create the list containing only valid micrograph id substrings
# --------------------------------------------------------------------------------
# Prepare lists to keep track of invalid (rejected) micrographs
no_input_mic_id_substr_list = []
no_coords_mic_id_substr_list = []
no_cter_entry_mic_id_substr_list = []
print(" ")
print("Checking consistency of the provided dataset ...")
# Loop over substring id list
for mic_id_substr in global_entry_dict:
mic_id_entry = global_entry_dict[mic_id_substr]
warinnig_messages = []
# selected micrograph basename must have been registed always .
if subkey_selected_mic_basename in mic_id_entry:
# Check if associated input micrograph exists
if not subkey_input_mic_path in mic_id_entry:
input_mic_path = mic_pattern.replace("*", mic_id_substr)
warinnig_messages.append(" associated input micrograph %s." % (input_mic_path))
no_input_mic_id_substr_list.append(mic_id_substr)
# Check if associated coordinate file exists
if not subkey_coords_path in mic_id_entry:
coords_path = coords_pattern.replace("*", mic_id_substr)
warinnig_messages.append(" associated coordinates file %s." % (coords_path))
no_coords_mic_id_substr_list.append(mic_id_substr)
if use_real_ctf_params:
# Check if associated CTER partres entry exists
if not subkey_cter_entry in mic_id_entry:
mic_basename = mic_basename_pattern.replace("*", mic_id_substr)
warinnig_messages.append(" associated entry with %s in the CTER partres file %s." % (mic_basename, ctf_params_src))
no_cter_entry_mic_id_substr_list.append(mic_id_substr)
else:
assert(not use_real_ctf_params)
# Register dummy CTF parameters
# Create dummy CTER partres entry
assert (float(ctf_params_src) > 0.0)
dummy_cter_entry = [None] * n_idx_cter
assert (len(dummy_cter_entry) == n_idx_cter)
dummy_cter_entry[idx_cter_def] = 0.0
dummy_cter_entry[idx_cter_cs] = 0.0
dummy_cter_entry[idx_cter_vol] = 300.0
dummy_cter_entry[idx_cter_apix] = float(ctf_params_src)
dummy_cter_entry[idx_cter_bfactor] = 0.0
dummy_cter_entry[idx_cter_total_ac] = 100.0
dummy_cter_entry[idx_cter_astig_amp] = 0.0
dummy_cter_entry[idx_cter_astig_ang] = 0.0
dummy_cter_entry[idx_cter_sd_def] = 0.0
dummy_cter_entry[idx_cter_sd_total_ac] = 0.0
dummy_cter_entry[idx_cter_sd_astig_amp] = 0.0
dummy_cter_entry[idx_cter_sd_astig_ang] = 0.0
dummy_cter_entry[idx_cter_cv_def] = 0.0
dummy_cter_entry[idx_cter_cv_astig_amp] = 0.0
dummy_cter_entry[idx_cter_error_def] = 0.5/dummy_cter_entry[idx_cter_apix] # Set to Nyquist frequency
dummy_cter_entry[idx_cter_error_astig] = 0.5/dummy_cter_entry[idx_cter_apix] # Set to Nyquist frequency
dummy_cter_entry[idx_cter_error_ctf] = 0.5/dummy_cter_entry[idx_cter_apix] # Set to Nyquist frequency
dummy_cter_entry[idx_cter_max_freq] = 0.5/dummy_cter_entry[idx_cter_apix] # Set to Nyquist frequency
dummy_cter_entry[idx_cter_reserved] = 0.0
dummy_cter_entry[idx_cter_const_ac] = 0.0
dummy_cter_entry[idx_cter_phase_shift] = 0.0
dummy_cter_entry[idx_cter_mic_name] = ""
assert (not subkey_cter_entry in mic_id_entry)
global_entry_dict[mic_id_substr][subkey_cter_entry] = dummy_cter_entry
if len(warinnig_messages) > 0:
print("WARNING!!! Micrograph ID %s does not have:" % (mic_id_substr))
for warinnig_message in warinnig_messages:
print(warinnig_message)
print(" Ignores this as an invalid entry.")
else:
# print("MRK_DEBUG: adding mic_id_substr := ", mic_id_substr)
valid_mic_id_substr_list.append(mic_id_substr)
# else:
# assert (not subkey_selected_mic_basename in mic_id_entry)
# # This entry is not in the selection list. Do nothing
# Check the input dataset consistency and save the result to a text file, if necessary.
if options.check_consistency:
# Create output directory
assert (not os.path.exists(root_out_dir))
os.mkdir(root_out_dir)
# Open the consistency check file
mic_consistency_check_info_path = os.path.join(root_out_dir, "mic_consistency_check_info_%s.txt"%(get_time_stamp_suffix()))
print(" ")
print("Generating consistency report of the provided dataset in %s..."%(mic_consistency_check_info_path))
mic_consistency_check_info_file = open(mic_consistency_check_info_path, "w")
mic_consistency_check_info_file.write("# The consistency information about micrograph IDs that might have problmes with consistency among the provided dataset.\n")
mic_consistency_check_info_file.write("# \n")
# Loop over substring id list
for mic_id_substr in global_entry_dict:
mic_id_entry = global_entry_dict[mic_id_substr]
consistency_messages = []
# Check if associated input micrograph path exists
if not subkey_input_mic_path in mic_id_entry:
input_mic_path = mic_pattern.replace("*", mic_id_substr)
consistency_messages.append(" associated input micrograph %s." % (input_mic_path))
# Check if associated selected micrograph basename exists
if not subkey_selected_mic_basename in mic_id_entry:
input_mic_path = mic_pattern.replace("*", mic_id_substr)
consistency_messages.append(" associated selected micrograph %s." % (input_mic_path))
# Check if associated coordinate file exists
if not subkey_coords_path in mic_id_entry:
coords_path = coords_pattern.replace("*", mic_id_substr)
consistency_messages.append(" associated coordinates file %s." % (coords_path))
if use_real_ctf_params:
# Check if associated CTER partres entry exists
if not subkey_cter_entry in mic_id_entry:
mic_basename = mic_basename_pattern.replace("*", mic_id_substr)
consistency_messages.append(" associated entry with %s in the CTER partres file %s." % (mic_basename, ctf_params_src))
else:
assert (not use_real_ctf_params)
# All entry must have dummy cter entry
assert (subkey_cter_entry in mic_id_entry)
if len(consistency_messages) > 0:
mic_consistency_check_info_file.write("Micrograph ID %s might have problems with consistency among the provided dataset:\n"%(mic_id_substr))
for consistency_message in consistency_messages:
mic_consistency_check_info_file.write(consistency_message)
mic_consistency_check_info_file.write("\n")
# Close the consistency check file, if necessary
mic_consistency_check_info_file.flush()
mic_consistency_check_info_file.close()
# Since mic_id_substr is once stored as the key of global_entry_dict and extracted with the key order
# we need sort the valid_mic_id_substr_list here
# print("MRK_DEBUG: before sort, valid_mic_id_substr_list := ", valid_mic_id_substr_list)
valid_mic_id_substr_list.sort()
# print("MRK_DEBUG: after sort, valid_mic_id_substr_list := ", valid_mic_id_substr_list)
# --------------------------------------------------------------------------------
# Print out the summary of input consistency
# --------------------------------------------------------------------------------
print(" ")
print("Summary of dataset consistency check...")
print("Detected : %6d" % (len(global_entry_dict)))
print("Valid : %6d" % (len(valid_mic_id_substr_list)))
print("Rejected by no coordinates file : %6d" % (len(no_coords_mic_id_substr_list)))
if use_real_ctf_params:
print("Rejected by no CTER partres entry : %6d" % (len(no_cter_entry_mic_id_substr_list)))
# --------------------------------------------------------------------------------
# Clean up variables related to tracking of invalid (rejected) micrographs
# --------------------------------------------------------------------------------
del no_input_mic_id_substr_list
del no_coords_mic_id_substr_list
del no_cter_entry_mic_id_substr_list
# --------------------------------------------------------------------------------
# Check MPI error condition
# --------------------------------------------------------------------------------
if error_status is None and len(valid_mic_id_substr_list) < n_mpi_procs:
error_status = ("Number of MPI processes (%d) supplied by --np in mpirun cannot be greater than %d (number of valid micrographs that satisfy all criteria to be processed)." % (n_mpi_procs, len(valid_mic_id_substr_list)), inspect.getframeinfo(inspect.currentframe()))
break
break
#
# NOTE: Toshio Moriya 2016/10/24
# The following function takes care of the case when an if-statement uses break for occurence of an error.
# However, more elegant way is to use 'exception' statement of exception mechanism...
#
utilities.if_error_then_all_processes_exit_program(error_status)
# ====================================================================================
# Obtain the list of micrograph id sustrings
# ====================================================================================
# --------------------------------------------------------------------------------
# Prepare variables for this section
# --------------------------------------------------------------------------------
# Prepare variables related to options
box_size = options.box_size
box_half = box_size // 2
mask2d = utilities.model_circle(box_size//2, box_size, box_size) # Create circular 2D mask to Util.infomask of particle images
resample_ratio = options.resample_ratio
# Prepare the function for reading coordinates files with the specified format.
# This way, the following switch statement is unnecessary inside of the coordinates loop.
coords_format = options.coordinates_format.lower()
read_coords_file = None
if coords_format == "sphire":
read_coords_file = read_sphire_coords_file
elif coords_format == "eman1":
read_coords_file = read_eman1_coords_file
elif coords_format == "eman2":
read_coords_file = read_eman2_coords_file
elif coords_format == "spider":
read_coords_file = read_spider_coords_file
else:
assert (False) # Unreachable code
assert (read_coords_file != None)
# Preapre variables related to CTF limit option
abs_ctf_limit_histogram = [] # compute the histogram for micrographs cut of by cter_entry limit.
# Micrograph baseroot pattern (extension are removed from micrograph basename pattern)
# for substack file names
mic_baseroot_pattern = os.path.splitext(mic_basename_pattern)[0]
# Prepare the counters for the global summary of micrographs
n_mic_process = 0
n_mic_reject_no_coords_entry = 0
n_global_coords_detect = 0
n_global_coords_process = 0
n_global_coords_reject_out_of_boundary = 0
# keep a copy of the root output directory where the final bdb will be created
unsliced_valid_serial_id_list = valid_mic_id_substr_list
mpi_proc_dir = root_out_dir
if RUNNING_UNDER_MPI:
mpi.mpi_barrier(mpi.MPI_COMM_WORLD)
# All mpi processes should know global entry directory and valid micrograph id substring list
global_entry_dict = utilities.wrap_mpi_bcast(global_entry_dict, main_mpi_proc)
valid_mic_id_substr_list = utilities.wrap_mpi_bcast(valid_mic_id_substr_list, main_mpi_proc)
# Slice the list of valid micrograph id substrings for this mpi process
mic_start, mic_end = applications.MPI_start_end(len(valid_mic_id_substr_list), n_mpi_procs, my_mpi_proc_id)
valid_mic_id_substr_list = valid_mic_id_substr_list[mic_start:mic_end]
# generate subdirectories user root_out_dir, one for each process
mpi_proc_dir = os.path.join(root_out_dir, "mpi_proc_%03d" % my_mpi_proc_id)
# Set up progress message and all necessary output directories
reject_out_of_boundary_dir = os.path.join(root_out_dir, "reject_out_of_boundary")
if my_mpi_proc_id == main_mpi_proc:
print(" ")
print("Micrographs processed by main process (including percent of progress):")
progress_percent_step = len(valid_mic_id_substr_list)/100.0 # the number of micrograms for main node divided by 100
# Create output directory
#
# NOTE: Toshio Moriya 2017/12/12
# This might not be necessary since particle_img.write_image() will automatically create all directory tree necessary to save the file.
# However, it is side-effect of the function, so we will explicitly make root output directory here.
#
if not os.path.exists(root_out_dir):
os.mkdir(root_out_dir)
assert not os.path.exists(reject_out_of_boundary_dir), "MRK_DEBUG"
os.mkdir(reject_out_of_boundary_dir)
if RUNNING_UNDER_MPI:
mpi.mpi_barrier(mpi.MPI_COMM_WORLD) # all MPI processes should wait until the directory is created by main process
#
# NOTE: 2017/12/12 Toshio Moriya
# To walk-around synchronisation problem between all MPI nodes and a file server,
# we use exception to assert the existence of directory.
#
try:
os.mkdir(reject_out_of_boundary_dir)
assert False, "MRK_DEBUG: Unreachable code..."
except OSError as err:
pass
else:
assert os.path.exists(reject_out_of_boundary_dir), "MRK_DEBUG"
# ------------------------------------------------------------------------------------
# Starting main parallel execution
# ------------------------------------------------------------------------------------
for mic_id_substr_idx, mic_id_substr in enumerate(valid_mic_id_substr_list):
# --------------------------------------------------------------------------------
# Print out progress if necessary
# --------------------------------------------------------------------------------
mic_basename = global_entry_dict[mic_id_substr][subkey_selected_mic_basename]
assert (mic_basename == mic_basename_pattern.replace("*", mic_id_substr))
if my_mpi_proc_id == main_mpi_proc:
print("%s ---> % 2.2f%%" % (mic_basename, mic_id_substr_idx / progress_percent_step))
# --------------------------------------------------------------------------------
# Read the associated coordinates according to the specified format and
# make the coordinates the center of particle image if necessary
# Do this first because error might happen
# --------------------------------------------------------------------------------
coords_path = global_entry_dict[mic_id_substr][subkey_coords_path]
assert (os.path.exists(coords_path))
assert (read_coords_file != None)
coords_list = read_coords_file(coords_path)
if (len(coords_list) == 0):
print("For %s, the associate coordinates file %s does not contain any entries. Skipping..." % (mic_basename, coords_path))
n_mic_reject_no_coords_entry += 1
continue
# --------------------------------------------------------------------------------
# Get CTF parameter if necessary
# Calculate the resampled pixel size and store it to the cter_entry if necessary
# Do before expensive micrograph processing
# --------------------------------------------------------------------------------
cter_entry = global_entry_dict[mic_id_substr][subkey_cter_entry]
src_pixel_size = cter_entry[idx_cter_apix]
if resample_ratio < 1.0:
assert (resample_ratio > 0.0)
# store the resampled pixel size to the cter_entry to generate CTF object of this micrograph
cter_entry[idx_cter_apix] = src_pixel_size / resample_ratio
if my_mpi_proc_id == main_mpi_proc:
print("Resample micrograph to pixel size %6.4f [A/Pixels] from %6.4f [A/Pixels] and window segments from resampled micrograph." % (cter_entry[idx_cter_apix], src_pixel_size))
# else: