Skip to content

Commit

Permalink
Towards passing more regression tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Oeffner authored and bkpoon committed May 10, 2021
1 parent 27d533e commit f733c70
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 47 deletions.
56 changes: 35 additions & 21 deletions iotbx/cif/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,8 +647,8 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
remaininglabls = alllabels[:]
phaseamplabls, remaininglabls = self.get_phase_amplitude_labels(remaininglabls)
mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels(remaininglabls)
data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(remaininglabls)
HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls)
data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(remaininglabls)

for w_id in wavelength_ids:
for crys_id in crystal_ids:
Expand All @@ -659,10 +659,13 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
if (len(wavelength_ids) > 1 or len(wavelengths) > 1) and w_id is not None:
wavelbl = ["wavelength_id=%i" %w_id]
if len(crystal_ids) > 1 and crys_id is not None:
cryslbl = ["crys_id=%i" %crys_id]
cryslbl = ["crystal_id=%i" %crys_id]
if len(scale_groups) > 1 and scale_group is not None:
scalegrplbl = ["scale_group_code=%i" %scale_group]
labelsuffix = wavelbl + cryslbl + scalegrplbl
labelsuffix = scalegrplbl + cryslbl + wavelbl
jlablsufx = ""
if len(labelsuffix):
jlablsufx = "," + ",".join(labelsuffix)
for mapcoefflabl in mapcoefflabls:
A_array = refln_loop[ mapcoefflabl[0] ]
B_array = refln_loop[ mapcoefflabl[1] ]
Expand All @@ -683,7 +686,7 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
millarr.set_info(base_array_info.customized_copy(labels= labl ,
wavelength=wavelengths.get(w_id, None)))
#self._arrays[millarr.info().label_string() ] = millarr
self._arrays[millarr.info().labels[0] ] = millarr
self._arrays[millarr.info().labels[0] + jlablsufx ] = millarr
for phaseamplabl in phaseamplabls:
amplitudestrarray = refln_loop[ phaseamplabl[0] ]
phasestrarray = refln_loop[ phaseamplabl[1] ]
Expand All @@ -702,13 +705,14 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
millarr.set_info(base_array_info.customized_copy(labels= labl ,
wavelength=wavelengths.get(w_id, None)))
#self._arrays[millarr.info().label_string() ] = millarr
self._arrays[millarr.info().labels[0] ] = millarr
self._arrays[millarr.info().labels[0] +jlablsufx ] = millarr
for datlabl,siglabl,otype in data_sig_obstype_labls:
datastrarray = refln_loop[datlabl]
millarr = self.flex_std_string_as_miller_array(
datastrarray, wavelength_id=w_id, crystal_id=crys_id,
scale_group_code=scale_group)
if millarr is None: continue
millarr = as_flex_double(millarr, datlabl)
datsiglabl = [datlabl]
if siglabl:
sigmasstrarray = refln_loop[siglabl]
Expand All @@ -729,7 +733,7 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
if otype is not None:
millarr.set_observation_type(otype)
#self._arrays[millarr.info().label_string() ] = millarr
self._arrays[ datlabl ] = millarr
self._arrays[ datlabl +jlablsufx ] = millarr
for hl_labels in HLcoefflabls:
hl_values = [ cif_block.get(hl_key) for hl_key in hl_labels ]
if hl_values.count(None) == 0:
Expand All @@ -749,7 +753,7 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
millarr.set_info(base_array_info.customized_copy(labels= hlabels,
wavelength=wavelengths.get(w_id, None)))
#self._arrays[millarr.info().label_string() ] = millarr
self._arrays[millarr.info().labels[0] ] = millarr
self._arrays[millarr.info().labels[0] +jlablsufx ] = millarr
# pick up remaining columns if any that weren't identified above
for label in alllabels:
#if (label.endswith('wavelength_id') or
Expand All @@ -774,18 +778,23 @@ def init_new(self, cif_block, base_array_info=None, wavelengths=None):
millarr.set_info(base_array_info.customized_copy(labels= labels,
wavelength=wavelengths.get(w_id, None)))
#self._arrays[millarr.info().label_string() ] = millarr
self._arrays[millarr.info().labels[0] ] = millarr
try:
origarr = flex.double(datastrarray)
except ValueError as e:
origarr = datastrarray
self._arrays[millarr.info().labels[0] +jlablsufx ] = millarr
#try:
# origarr = flex.double(datastrarray)
#except ValueError as e:
# origarr = datastrarray
origarr = self.flex_std_string_as_miller_array(
datastrarray, wavelength_id=w_id, crystal_id=crys_id,
scale_group_code=scale_group)
newlabel = label.replace("_refln.", "")
newlabel2 = newlabel.replace("_refln_", "")
self._origarrays[newlabel2 + ",".join(labelsuffix)] = origarr
if origarr: # want only genuine miller arrays
self._origarrays[newlabel2 + jlablsufx ] = origarr.data()

for key, array in six.iteritems(self._arrays.copy()):
if ( key.endswith('_minus') or '_minus_' in key or '-' in key
or key.endswith('_plus') or '_plus_' in key or '+' in key):
#if ( key.endswith('_minus') or '_minus_' in key or '-' in key
# or key.endswith('_plus') or '_plus_' in key or '+' in key):
plus_key = ""
if '_minus' in key:
minus_key = key
plus_key = key.replace('_minus', '_plus')
Expand Down Expand Up @@ -886,17 +895,19 @@ def get_phase_amplitude_labels(self, keys):
PHmatches = re.findall("((\S*PH)([^I]\S*))", alllabels ) # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
for label in lstkeys:
for m in PHmatches:
Flabel = m[1].replace("PH","F") + m[2]
if Flabel == label:
PFlabel = m[1].replace("PH","F") + m[2]
Flabel = m[1].replace("PH","") + m[2]
if Flabel == label or PFlabel == label:
phase_amplitudelabels.append([ label, m[0]])
remainingkeys.remove(label)
remainingkeys.remove(m[0])
alllabels = " ".join(remainingkeys)
PHImatches = re.findall("((\S*PHI)(\S*))", alllabels ) # [('_refln.PHIC', '_refln.PHI', 'C'), ('_refln.PHIC_ALL', '_refln.PHI', 'C_ALL')]
for label in lstkeys:
for m in PHImatches:
Flabel = m[1].replace("PHI","F") + m[2]
if Flabel == label:
PFlabel = m[1].replace("PHI","F") + m[2]
Flabel = m[1].replace("PHI","") + m[2]
if Flabel == label or PFlabel == label:
phase_amplitudelabels.append([ label, m[0]])
remainingkeys.remove(label)
remainingkeys.remove(m[0])
Expand All @@ -910,12 +921,14 @@ def get_phase_amplitude_labels(self, keys):
remainingkeys.remove(label)
remainingkeys.remove(m[0])
alllabels = " ".join(remainingkeys)
phase_matches = re.findall("((\S*[\._])phase(_\S*))", alllabels ) # [('_refln.phase_calc', '_refln.', '')]
phase_matches = re.findall("((\S*[\._])phase(\S*))", alllabels ) # [('_refln.phase_calc', '_refln.', '')]
for label in lstkeys:
for m in phase_matches:
phaselabel = m[0]
Flabl = m[1] + m[2]
Flabel = m[1] + "F" + m[2]
if Flabel in label: # in case of _refln.F_calc_au and _refln.phase_calc
Faulabel = m[1] + "F" + m[2] + "_au"
if Flabl in label or Flabel in label or Faulabel in label: # in case of _refln.F_calc_au and _refln.phase_calc
phase_amplitudelabels.append([ label, m[0]])
remainingkeys.remove(label)
remainingkeys.remove(m[0])
Expand Down Expand Up @@ -1005,6 +1018,7 @@ def flex_std_string_as_miller_array(self, value,
value, wavelength_id=wavelength_id,
crystal_id=crystal_id, scale_group_code=scale_group_code)
data = value.select(selection)
#if not isinstance(data, flex.double):
try:
data = flex.int(data)
indices = self.indices.select(selection)
Expand Down
53 changes: 27 additions & 26 deletions iotbx/cif/tests/tst_lex_parse_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,24 +121,24 @@ def exercise_miller_arrays_as_cif_block():
_refln_index_k
_refln_index_l
_refln.crystal_id
_refln.scale_group_code
_refln.wavelength_id
_refln.scale_group_code
_refln.pdbx_I_plus
_refln.pdbx_I_plus_sigma
_refln.pdbx_I_minus
_refln.pdbx_I_minus_sigma
-87 5 46 1 1 3 40.2 40.4 6.7 63.9
-87 5 45 1 1 3 47.8 29.7 35.1 30.5
-87 5 44 1 1 3 18.1 33.2 0.5 34.6
-87 5 43 1 1 3 6.1 45.4 12.9 51.6
-87 5 42 1 1 3 -6.6 45.6 -15.5 55.8
-87 7 37 1 1 3 6.3 43.4 ? ?
-87 7 36 1 1 3 -67.2 55.4 ? ?
-88 2 44 1 1 3 0 -1 35 38.5
-88 2 43 1 1 3 0 -1 57.4 41.5
-88 4 45 1 1 3 -1 46.1 -9.1 45.6
-88 4 44 1 1 3 -19.8 49.2 0.3 34.7
-88 6 44 1 1 3 -1.8 34.8 ? ?
-87 5 46 1 3 1 40.2 40.4 6.7 63.9
-87 5 45 1 3 1 47.8 29.7 35.1 30.5
-87 5 44 1 3 1 18.1 33.2 0.5 34.6
-87 5 43 1 3 1 6.1 45.4 12.9 51.6
-87 5 42 1 3 1 -6.6 45.6 -15.5 55.8
-87 7 37 1 3 1 6.3 43.4 ? ?
-87 7 36 1 3 1 -67.2 55.4 ? ?
-88 2 44 1 3 1 0 -1 35 38.5
-88 2 43 1 3 1 0 -1 57.4 41.5
-88 4 45 1 3 1 -1 46.1 -9.1 45.6
-88 4 44 1 3 1 -19.8 49.2 0.3 34.7
-88 6 44 1 3 1 -1.8 34.8 ? ?
""")

Expand Down Expand Up @@ -228,7 +228,7 @@ def exercise_lex_parse_build():
cif_miller_array_template %(
'_refln_F_calc', '_refln_phase_calc', '_refln_F_sigma')),
data_block_name='global')
assert sorted(arrays.keys()) == ['_refln_F_calc']
assert sorted(arrays.keys()) == ['_refln_F_calc', '_refln_F_sigma']
assert arrays['_refln_F_calc'].is_complex_array()

for data_block_name in (None, "global"):
Expand Down Expand Up @@ -602,12 +602,13 @@ def exercise_crystal_symmetry():

def exercise_mmcif_structure_factors():
miller_arrays = cif.reader(input_string=r3adrsf).as_miller_arrays()
assert len(miller_arrays) == 16
assert len(miller_arrays) == 27 #16
hl_coeffs = find_miller_array_from_labels(
miller_arrays, ','.join([
'scale_group_code=1', 'crystal_id=2', 'wavelength_id=3',
'_refln.pdbx_HL_A_iso', '_refln.pdbx_HL_B_iso',
'_refln.pdbx_HL_C_iso', '_refln.pdbx_HL_D_iso']))
'_refln.pdbx_HL_C_iso', '_refln.pdbx_HL_D_iso',
'scale_group_code=1', 'crystal_id=2', 'wavelength_id=3'
]))
assert hl_coeffs.is_hendrickson_lattman_array()
assert hl_coeffs.size() == 2
mas_as_cif_block = cif.miller_arrays_as_cif_block(
Expand All @@ -622,8 +623,8 @@ def exercise_mmcif_structure_factors():
assert approx_equal(hl_coeffs.data(), hl_coeffs_from_cif_block)
f_meas_au = find_miller_array_from_labels(
miller_arrays, ','.join([
'scale_group_code=1', 'crystal_id=1', 'wavelength_id=1',
'_refln.F_meas_au', '_refln.F_meas_sigma_au']))
'_refln.F_meas_au', '_refln.F_meas_sigma_au',
'scale_group_code=1', 'crystal_id=1', 'wavelength_id=1']))
assert f_meas_au.is_xray_amplitude_array()
assert f_meas_au.size() == 5
assert f_meas_au.sigmas() is not None
Expand All @@ -641,25 +642,25 @@ def exercise_mmcif_structure_factors():
assert pdbx_I_plus_minus.space_group() is None # this CIF block
#
miller_arrays = cif.reader(input_string=r3ad7sf).as_miller_arrays()
assert len(miller_arrays) == 11
assert len(miller_arrays) == 14 #11
f_calc = find_miller_array_from_labels(
miller_arrays, ','.join([
'crystal_id=2', 'wavelength_id=1', '_refln.F_calc', '_refln.phase_calc']))
'_refln.F_calc', '_refln.phase_calc', 'crystal_id=2'])) #, 'wavelength_id=1']))
assert f_calc.is_complex_array()
assert f_calc.size() == 4
#
miller_arrays = cif.reader(input_string=integer_observations).as_miller_arrays()
assert len(miller_arrays) == 2
assert isinstance(miller_arrays[0].data(), flex.double)
assert isinstance(miller_arrays[0].sigmas(), flex.double)
assert isinstance(miller_arrays[1].data(), flex.double)
assert isinstance(miller_arrays[1].sigmas(), flex.double)
#
miller_arrays = cif.reader(input_string=r3v56sf).as_miller_arrays()
assert len(miller_arrays) == 2
for ma in miller_arrays: assert ma.is_complex_array()
assert miller_arrays[0].info().labels == [
'r3v56sf', '_refln.pdbx_DELFWT', '_refln.pdbx_DELPHWT']
assert miller_arrays[1].info().labels == [
'r3v56sf', '_refln.pdbx_FWT', '_refln.pdbx_PHWT']
'_refln.pdbx_DELFWT', '_refln.pdbx_DELPHWT']
assert miller_arrays[0].info().labels == [
'_refln.pdbx_FWT', '_refln.pdbx_PHWT']


def find_miller_array_from_labels(miller_arrays, labels):
Expand Down

0 comments on commit f733c70

Please sign in to comment.