From 0a5362505475e42b11538c4a1c4cb102e3704efb Mon Sep 17 00:00:00 2001 From: rozyczko Date: Thu, 2 Oct 2025 15:32:19 +0200 Subject: [PATCH 01/10] install pycairo before xhtml2pdf --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a0c79ff5..597e625f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "refnx", "refl1d>=1.0.0rc0", "orsopy", + "pycairo<1.6" "xhtml2pdf", "bumps", ] From 117e5798f242bae75dc8424305118f538f1e0a0b Mon Sep 17 00:00:00 2001 From: rozyczko Date: Thu, 2 Oct 2025 15:34:14 +0200 Subject: [PATCH 02/10] typo --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 597e625f..ee41012f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "refnx", "refl1d>=1.0.0rc0", "orsopy", - "pycairo<1.6" + "pycairo<1.6", "xhtml2pdf", "bumps", ] From c51cd1fb8988a9e323a151b6b212198ec457c013 Mon Sep 17 00:00:00 2001 From: rozyczko Date: Thu, 2 Oct 2025 17:20:23 +0200 Subject: [PATCH 03/10] it's 1.16 and only on linux --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ee41012f..a8feafe3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "refnx", "refl1d>=1.0.0rc0", "orsopy", - "pycairo<1.6", + "pycairo<1.16 ; platform_system=='Linux'", "xhtml2pdf", "bumps", ] From 262546c2c1ab6379706937ebcaeb183cb5e3398f Mon Sep 17 00:00:00 2001 From: Piotr Rozyczko Date: Thu, 2 Oct 2025 17:39:26 +0200 Subject: [PATCH 04/10] it is svglib --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a8feafe3..661ee847 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "refnx", "refl1d>=1.0.0rc0", "orsopy", - "pycairo<1.16 ; platform_system=='Linux'", + "svglib<1.6 ; platform_system=='Linux'", "xhtml2pdf", "bumps", ] From 545496f7d28b13667161566cc024d8791dc98fcc Mon Sep 17 00:00:00 2001 From: Piotr Rozyczko Date: Fri, 3 Oct 2025 15:23:47 +0200 Subject: [PATCH 05/10] zero variance being mapped out. --- src/easyreflectometry/data/measurement.py | 109 ++++++++++++++++++++-- tests/test_data.py | 45 +++++++++ 2 files changed, 148 insertions(+), 6 deletions(-) diff --git a/src/easyreflectometry/data/measurement.py b/src/easyreflectometry/data/measurement.py index 1ba4addc..fbc4379d 100644 --- a/src/easyreflectometry/data/measurement.py +++ b/src/easyreflectometry/data/measurement.py @@ -1,6 +1,7 @@ __author__ = 'github.com/arm61' import os +import warnings from typing import TextIO from typing import Union @@ -31,11 +32,38 @@ def load_as_dataset(fname: Union[TextIO, str]) -> DataSet1D: coords_name = 'Qz_' + basename coords_name = list(data_group['coords'].keys())[0] if coords_name not in data_group['coords'] else coords_name data_name = list(data_group['data'].keys())[0] if data_name not in data_group['data'] else data_name + + # Get the data arrays + x_vals = data_group['coords'][coords_name].values + y_vals = data_group['data'][data_name].values + ye_vals = data_group['data'][data_name].variances + xe_vals = data_group['coords'][coords_name].variances + + # Check for zero variances and remove those points + # Only check ye_vals for zero if xe_vals was all zeros (3-column file) + if np.all(xe_vals == 0.0): + zero_variance_mask = (ye_vals == 0.0) + else: + zero_variance_mask = (ye_vals == 0.0) | (xe_vals == 0.0) + num_zero_variance = np.sum(zero_variance_mask) + + if num_zero_variance > 0: + warnings.warn( + f"Removed {num_zero_variance} data point(s) from '{fname}' due to zero variance.", + UserWarning + ) + # Keep only points with non-zero variances + valid_mask = ~zero_variance_mask + x_vals = x_vals[valid_mask] + y_vals = y_vals[valid_mask] + ye_vals = ye_vals[valid_mask] + xe_vals = xe_vals[valid_mask] + return DataSet1D( - x=data_group['coords'][coords_name].values, - y=data_group['data'][data_name].values, - ye=data_group['data'][data_name].variances, - xe=data_group['coords'][coords_name].variances, + x=x_vals, + y=y_vals, + ye=ye_vals, + xe=xe_vals, ) @@ -72,6 +100,50 @@ def _load_orso(fname: Union[TextIO, str]) -> sc.DataGroup: variances=np.square(o.data[:, 2]), ) attrs[f'R_{name}'] = {'orso_header': sc.scalar(Header.asdict(o.info))} + + # Check for zero variances and remove those points + total_removed = 0 + for key in list(data.keys()): + data_variances = data[key].variances + # Find the corresponding coordinate key for this data + coord_key = None + for coord_name in coords.keys(): + if data[key].dims == coords[coord_name].dims: + coord_key = coord_name + break + + if coord_key is None: + continue # Skip if no matching coordinate found + + coord_variances = coords[coord_key].variances + + # Find points with zero variances + zero_variance_mask = (data_variances == 0.0) | (coord_variances == 0.0) + num_zero_variance = np.sum(zero_variance_mask) + total_removed += num_zero_variance + + if num_zero_variance > 0: + # Keep only points with non-zero variances + valid_mask = ~zero_variance_mask + data[key] = sc.array( + dims=data[key].dims, + values=data[key].values[valid_mask], + variances=data[key].variances[valid_mask], + unit=data[key].unit if hasattr(data[key], 'unit') else None + ) + coords[coord_key] = sc.array( + dims=coords[coord_key].dims, + values=coords[coord_key].values[valid_mask], + variances=coords[coord_key].variances[valid_mask], + unit=coords[coord_key].unit + ) + + if total_removed > 0: + warnings.warn( + f"Removed {total_removed} data point(s) from '{fname}' due to zero variance.", + UserWarning + ) + return sc.DataGroup(data=data, coords=coords, attrs=attrs) @@ -120,12 +192,37 @@ def _load_txt(fname: Union[TextIO, str]) -> sc.DataGroup: data_name = 'R_' + basename coords_name = 'Qz_' + basename - data = {data_name: sc.array(dims=[coords_name], values=y, variances=np.square(e))} + + # Calculate variances + y_variances = np.square(e) + x_variances = np.square(xe) + + # Check for zero variances and remove those points + # Only check y_variances for zero if xe was all zeros (3-column file) + if np.all(xe == 0.0): + zero_variance_mask = (y_variances == 0.0) + else: + zero_variance_mask = (y_variances == 0.0) | (x_variances == 0.0) + num_zero_variance = np.sum(zero_variance_mask) + + if num_zero_variance > 0: + warnings.warn( + f"Removed {num_zero_variance} data point(s) from '{fname}' due to zero variance.", + UserWarning + ) + # Keep only points with non-zero variances + valid_mask = ~zero_variance_mask + x = x[valid_mask] + y = y[valid_mask] + y_variances = y_variances[valid_mask] + x_variances = x_variances[valid_mask] + + data = {data_name: sc.array(dims=[coords_name], values=y, variances=y_variances)} coords = { data[data_name].dims[0]: sc.array( dims=[coords_name], values=x, - variances=np.square(xe), + variances=x_variances, unit=sc.Unit('1/angstrom'), ) } diff --git a/tests/test_data.py b/tests/test_data.py index a974a75a..74cbb312 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -13,6 +13,7 @@ from easyreflectometry.data.measurement import _load_orso from easyreflectometry.data.measurement import _load_txt from easyreflectometry.data.measurement import load +from easyreflectometry.data.measurement import load_as_dataset PATH_STATIC = os.path.join(os.path.dirname(easyreflectometry.__file__), '..', '..', 'tests', '_static') @@ -103,3 +104,47 @@ def test_txt(self): assert_almost_equal(er_data['coords'][coords_name].values, n_data[:, 0]) assert_almost_equal(er_data['data'][data_name].variances, np.square(n_data[:, 2])) assert_almost_equal(er_data['coords'][coords_name].variances, np.square(n_data[:, 3])) + + def test_zero_variance_filtering(self): + """Test that data points with zero variance are properly filtered out.""" + import warnings + + fpath = os.path.join(PATH_STATIC, 'ref_zero_var.txt') + + # Load original data to verify expectations + original_data = np.loadtxt(fpath, delimiter=',', comments='#') + original_count = len(original_data) + zero_variance_count = np.sum(original_data[:, 2] == 0.0) + expected_filtered_count = original_count - zero_variance_count + + # Test with warnings captured + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + dataset = load_as_dataset(fpath) + + # Check that a warning about removed points was issued + warning_messages = [str(warning.message) for warning in w] + zero_variance_warning = None + for msg in warning_messages: + if f"Removed {zero_variance_count} data point(s)" in msg: + zero_variance_warning = msg + break + self.assertIsNotNone(zero_variance_warning, f"Expected warning about removed points not found in: {warning_messages}") + + # Verify the correct number of points were filtered + self.assertEqual(len(dataset.x), expected_filtered_count) + self.assertEqual(len(dataset.y), expected_filtered_count) + self.assertEqual(len(dataset.ye), expected_filtered_count) + + # Verify no zero variances remain in the filtered data + self.assertTrue(np.all(dataset.ye > 0.0)) + + # Verify the filtered data matches expected values (non-zero variance points) + non_zero_mask = original_data[:, 2] != 0.0 + expected_x = original_data[non_zero_mask, 0] + expected_y = original_data[non_zero_mask, 1] + expected_ye = np.square(original_data[non_zero_mask, 2]) + + assert_almost_equal(dataset.x, expected_x) + assert_almost_equal(dataset.y, expected_y) + assert_almost_equal(dataset.ye, expected_ye) From 08494cc7a5c79b05931c48659dd9ca1b161e96b5 Mon Sep 17 00:00:00 2001 From: Piotr Rozyczko Date: Fri, 3 Oct 2025 15:27:00 +0200 Subject: [PATCH 06/10] added test file --- tests/_static/ref_zero_var.txt | 193 +++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 tests/_static/ref_zero_var.txt diff --git a/tests/_static/ref_zero_var.txt b/tests/_static/ref_zero_var.txt new file mode 100644 index 00000000..b37cc636 --- /dev/null +++ b/tests/_static/ref_zero_var.txt @@ -0,0 +1,193 @@ +# q(A-1), ref, err +7.6135e-03, 9.7447e-01, 2.5462e-02 +7.7658e-03, 1.0139e+00, 2.5015e-02 +7.9211e-03, 1.0429e+00, 2.4555e-02 +8.0796e-03, 9.7839e-01, 2.2475e-02 +8.2411e-03, 9.7563e-01, 2.1450e-02 +8.4060e-03, 9.8295e-01, 2.0815e-02 +8.5741e-03, 1.0347e+00, 2.0846e-02 +8.7456e-03, 9.9588e-01, 1.9555e-02 +8.9205e-03, 9.8956e-01, 1.8711e-02 +9.0989e-03, 1.0045e+00, 1.8276e-02 +9.2809e-03, 9.8197e-01, 1.7210e-02 +9.4665e-03, 9.9510e-01, 1.6929e-02 +9.6558e-03, 9.9095e-01, 1.6384e-02 +9.8489e-03, 9.5620e-01, 1.5578e-02 +1.0046e-02, 7.4635e-01, 1.2730e-02 +1.0247e-02, 5.3653e-01, 1.0084e-02 +1.0452e-02, 3.4076e-01, 7.5298e-03 +1.0661e-02, 2.1115e-01, 5.5973e-03 +1.0874e-02, 1.5915e-01, 4.6622e-03 +1.1091e-02, 1.2474e-01, 3.9928e-03 +1.1313e-02, 1.0077e-01, 3.4676e-03 +1.1540e-02, 6.7939e-02, 2.7482e-03 +1.1770e-02, 6.1736e-02, 2.5467e-03 +1.2006e-02, 4.8561e-02, 2.2045e-03 +1.2246e-02, 4.1062e-02, 1.9733e-03 +1.2491e-02, 3.2515e-02, 1.7031e-03 +1.2741e-02, 2.7232e-02, 1.5269e-03 +1.2995e-02, 2.2746e-02, 1.3521e-03 +1.3255e-02, 1.7792e-02, 1.1778e-03 +1.3520e-02, 1.2697e-02, 9.7116e-04 +1.3791e-02, 1.2313e-02, 9.4167e-04 +1.4067e-02, 1.0335e-02, 8.7240e-04 +1.4348e-02, 7.9747e-03, 7.5829e-04 +1.4635e-02, 7.9688e-03, 7.5098e-04 +1.4928e-02, 5.5187e-03, 6.3000e-04 +1.5226e-02, 4.3436e-03, 5.7090e-04 +1.5531e-02, 3.7503e-03, 5.3663e-04 +1.5841e-02, 3.4079e-03, 5.2009e-04 +1.6158e-02, 1.5589e-03, 3.6834e-04 +1.6481e-02, 1.8615e-03, 3.9704e-04 +1.6811e-02, 8.4691e-04, 2.5540e-04 +1.7147e-02, 1.2625e-03, 3.0629e-04 +1.7490e-02, 1.0137e-03, 2.7099e-04 +1.7840e-02, 6.0481e-04, 2.0163e-04 +1.8197e-02, 7.6578e-04, 2.2110e-04 +1.8561e-02, 9.3992e-04, 2.4274e-04 +1.8932e-02, 4.2138e-04, 1.5928e-04 +1.9311e-02, 4.8892e-04, 3.9837e-05 +1.9697e-02, 4.4982e-04, 3.6981e-05 +2.0091e-02, 4.3653e-04, 3.5081e-05 +2.0493e-02, 3.9129e-04, 3.1923e-05 +2.0902e-02, 4.5988e-04, 3.3398e-05 +2.1320e-02, 4.7029e-04, 3.2732e-05 +2.1747e-02, 4.1789e-04, 2.9339e-05 +2.2182e-02, 5.5580e-04, 3.3223e-05 +2.2625e-02, 5.5040e-04, 3.2084e-05 +2.3078e-02, 5.9884e-04, 3.2446e-05 +2.3539e-02, 5.8326e-04, 3.0862e-05 +2.4010e-02, 6.4411e-04, 3.1161e-05 +2.4490e-02, 6.0736e-04, 2.9565e-05 +2.4980e-02, 6.7219e-04, 3.0222e-05 +2.5480e-02, 7.6819e-04, 3.1418e-05 +2.5989e-02, 7.7232e-04, 3.0737e-05 +2.6509e-02, 7.4941e-04, 2.9341e-05 +2.7039e-02, 7.2715e-04, 2.7959e-05 +2.7580e-02, 8.1394e-04, 2.9100e-05 +2.8132e-02, 7.9111e-04, 2.7939e-05 +2.8694e-02, 7.7717e-04, 2.6943e-05 +2.9268e-02, 7.7420e-04, 2.6248e-05 +2.9854e-02, 7.4343e-04, 2.5134e-05 +3.0451e-02, 7.6124e-04, 2.4730e-05 +3.1060e-02, 7.9219e-04, 2.4832e-05 +3.1681e-02, 7.3167e-04, 2.3434e-05 +3.2315e-02, 6.7479e-04, 2.2047e-05 +3.2961e-02, 6.4508e-04, 2.1919e-05 +3.3620e-02, 6.0626e-04, 2.0950e-05 +3.4293e-02, 6.0048e-04, 2.0702e-05 +3.4978e-02, 5.4601e-04, 1.9966e-05 +3.5678e-02, 5.1839e-04, 1.9762e-05 +3.6392e-02, 4.3221e-04, 1.8272e-05 +3.7119e-02, 4.7017e-04, 1.9439e-05 +3.7862e-02, 4.1933e-04, 1.9311e-05 +3.8619e-02, 3.6578e-04, 1.7446e-05 +3.9391e-02, 2.9619e-04, 1.4991e-05 +4.0179e-02, 2.6859e-04, 1.4041e-05 +4.0983e-02, 2.1978e-04, 1.2514e-05 +4.1802e-02, 1.5957e-04, 6.1469e-06 +4.2638e-02, 1.4973e-04, 5.6957e-06 +4.3491e-02, 1.3650e-04, 5.1658e-06 +4.4361e-02, 1.0894e-04, 4.3978e-06 +4.5248e-02, 8.4313e-05, 3.6870e-06 +4.6153e-02, 6.6209e-05, 3.1442e-06 +4.7076e-02, 4.7721e-05, 2.5218e-06 +4.8018e-02, 3.8550e-05, 2.1951e-06 +4.8978e-02, 2.4256e-05, 1.6738e-06 +4.9958e-02, 1.2668e-05, 1.1440e-06 +5.0957e-02, 9.8042e-06, 9.8695e-07 +5.1976e-02, 5.5653e-06, 7.2030e-07 +5.3016e-02, 4.6001e-06, 6.3347e-07 +5.4076e-02, 4.5340e-06, 6.0180e-07 +5.5157e-02, 6.0764e-06, 6.7701e-07 +5.6261e-02, 1.0837e-05, 8.8046e-07 +5.7386e-02, 1.5080e-05, 1.0136e-06 +5.8534e-02, 1.8215e-05, 1.0789e-06 +5.9704e-02, 2.4590e-05, 1.2253e-06 +6.0898e-02, 2.9010e-05, 1.3040e-06 +6.2116e-02, 3.4848e-05, 1.3756e-06 +6.3359e-02, 3.7815e-05, 1.4164e-06 +6.4626e-02, 3.6734e-05, 1.3531e-06 +6.5918e-02, 3.8991e-05, 1.3649e-06 +6.7237e-02, 3.7503e-05, 1.3006e-06 +6.8581e-02, 3.7938e-05, 1.2808e-06 +6.9953e-02, 3.5174e-05, 1.1989e-06 +7.1352e-02, 3.1424e-05, 1.1148e-06 +7.2779e-02, 2.5972e-05, 9.9229e-07 +7.4235e-02, 2.1881e-05, 8.9970e-07 +7.5719e-02, 1.8255e-05, 8.3013e-07 +7.7234e-02, 1.3071e-05, 6.9096e-07 +7.8778e-02, 9.7935e-06, 5.9432e-07 +8.0354e-02, 6.2634e-06, 4.8657e-07 +8.1961e-02, 2.9788e-06, 3.3668e-07 +8.3600e-02, 2.0870e-06, 2.8795e-07 +8.5272e-02, 1.4029e-06, 2.4111e-07 +8.6978e-02, 1.1454e-06, 2.2760e-07 +8.8717e-02, 2.3790e-06, 3.1299e-07 +9.0492e-02, 3.0687e-06, 3.4127e-07 +9.2301e-02, 5.3517e-06, 4.4876e-07 +9.4147e-02, 6.2487e-06, 4.7367e-07 +9.6030e-02, 7.6304e-06, 5.0400e-07 +9.7951e-02, 9.0528e-06, 5.4178e-07 +9.9910e-02, 9.6743e-06, 5.5215e-07 +1.0191e-01, 9.6967e-06, 5.4148e-07 +1.0395e-01, 8.7491e-06, 5.0450e-07 +1.0603e-01, 7.5989e-06, 4.5372e-07 +1.0815e-01, 5.2862e-06, 3.6156e-07 +1.1031e-01, 3.8920e-06, 2.9952e-07 +1.1251e-01, 2.9870e-06, 2.8059e-07 +1.1477e-01, 1.6669e-06, 1.8473e-07 +1.1706e-01, 8.6049e-07, 1.2721e-07 +1.1940e-01, 8.7034e-07, 1.2368e-07 +1.2179e-01, 1.0986e-06, 1.3499e-07 +1.2423e-01, 1.9180e-06, 1.7343e-07 +1.2671e-01, 2.1959e-06, 1.8012e-07 +1.2924e-01, 3.1488e-06, 2.1032e-07 +1.3183e-01, 3.6948e-06, 2.2170e-07 +1.3447e-01, 3.8395e-06, 2.2005e-07 +1.3716e-01, 3.5445e-06, 2.0548e-07 +1.3990e-01, 2.8194e-06, 1.7880e-07 +1.4270e-01, 2.1205e-06, 1.5114e-07 +1.4555e-01, 1.4023e-06, 1.1952e-07 +1.4846e-01, 1.0245e-06, 9.9819e-08 +1.5143e-01, 6.8465e-07, 7.9165e-08 +1.5446e-01, 9.5744e-07, 9.2363e-08 +1.5755e-01, 1.2631e-06, 1.0379e-07 +1.6070e-01, 1.4797e-06, 1.1061e-07 +1.6391e-01, 2.0772e-06, 1.3256e-07 +1.6719e-01, 1.8731e-06, 1.2463e-07 +1.7054e-01, 1.6925e-06, 1.1733e-07 +1.7395e-01, 1.4556e-06, 1.0970e-07 +1.7742e-01, 1.1049e-06, 9.7703e-08 +1.8097e-01, 6.5186e-07, 7.5546e-08 +1.8459e-01, 5.9841e-07, 7.3744e-08 +1.8828e-01, 8.0107e-07, 8.9328e-08 +1.9205e-01, 1.0381e-06, 1.0071e-07 +1.9589e-01, 1.1199e-06, 9.9764e-08 +1.9981e-01, 1.2928e-06, 1.0516e-07 +2.0381e-01, 8.6691e-07, 8.5084e-08 +2.0788e-01, 7.0954e-07, 7.4143e-08 +2.1204e-01, 5.3766e-07, 6.2839e-08 +2.1628e-01, 5.7456e-07, 6.4332e-08 +2.2061e-01, 7.8023e-07, 0.0 +2.2502e-01, 8.3610e-07, 7.4847e-08 +2.2952e-01, 9.8054e-07, 7.9847e-08 +2.3411e-01, 6.9297e-07, 6.5946e-08 +2.3879e-01, 8.1531e-07, 7.0975e-08 +2.4357e-01, 5.4552e-07, 5.7255e-08 +2.4844e-01, 6.1100e-07, 5.9889e-08 +2.5341e-01, 5.1126e-07, 5.4720e-08 +2.5847e-01, 7.3334e-07, 6.4703e-08 +2.6364e-01, 6.5027e-07, 0.0 +2.6892e-01, 7.0689e-07, 6.6486e-08 +2.7430e-01, 6.6355e-07, 6.3295e-08 +2.7978e-01, 6.1870e-07, 6.0932e-08 +2.8538e-01, 4.6847e-07, 0.0 +2.9108e-01, 5.1032e-07, 5.5877e-08 +2.9691e-01, 4.6251e-07, 5.3567e-08 +3.0284e-01, 3.6602e-07, 4.8650e-08 +3.0890e-01, 4.0211e-07, 5.2733e-08 +3.1508e-01, 3.6333e-07, 5.3747e-08 +3.2138e-01, 3.1715e-07, 0.0 +3.2781e-01, 3.4336e-07, 0.0 +3.3436e-01, 3.2231e-07, 0.0 From 7b72b1be3423dd7b84f1573bc75c6881fbea8e24 Mon Sep 17 00:00:00 2001 From: Piotr Rozyczko Date: Fri, 3 Oct 2025 15:40:32 +0200 Subject: [PATCH 07/10] ruff --- tests/test_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_data.py b/tests/test_data.py index 74cbb312..3655aa60 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -129,7 +129,8 @@ def test_zero_variance_filtering(self): if f"Removed {zero_variance_count} data point(s)" in msg: zero_variance_warning = msg break - self.assertIsNotNone(zero_variance_warning, f"Expected warning about removed points not found in: {warning_messages}") + self.assertIsNotNone(zero_variance_warning, + f"Expected warning about removed points not found in: {warning_messages}") # Verify the correct number of points were filtered self.assertEqual(len(dataset.x), expected_filtered_count) From 55d9f75e35f0a122f87315dad43506e5d206731b Mon Sep 17 00:00:00 2001 From: Piotr Rozyczko Date: Fri, 3 Oct 2025 21:07:29 +0200 Subject: [PATCH 08/10] check/mask zero variances before the fitting. the datastore should contain pristine datasets --- src/easyreflectometry/data/measurement.py | 109 +-------------- src/easyreflectometry/fitting.py | 41 +++++- tests/test_data.py | 45 ------- tests/test_fitting.py | 155 ++++++++++++++++++++++ 4 files changed, 199 insertions(+), 151 deletions(-) diff --git a/src/easyreflectometry/data/measurement.py b/src/easyreflectometry/data/measurement.py index fbc4379d..1ba4addc 100644 --- a/src/easyreflectometry/data/measurement.py +++ b/src/easyreflectometry/data/measurement.py @@ -1,7 +1,6 @@ __author__ = 'github.com/arm61' import os -import warnings from typing import TextIO from typing import Union @@ -32,38 +31,11 @@ def load_as_dataset(fname: Union[TextIO, str]) -> DataSet1D: coords_name = 'Qz_' + basename coords_name = list(data_group['coords'].keys())[0] if coords_name not in data_group['coords'] else coords_name data_name = list(data_group['data'].keys())[0] if data_name not in data_group['data'] else data_name - - # Get the data arrays - x_vals = data_group['coords'][coords_name].values - y_vals = data_group['data'][data_name].values - ye_vals = data_group['data'][data_name].variances - xe_vals = data_group['coords'][coords_name].variances - - # Check for zero variances and remove those points - # Only check ye_vals for zero if xe_vals was all zeros (3-column file) - if np.all(xe_vals == 0.0): - zero_variance_mask = (ye_vals == 0.0) - else: - zero_variance_mask = (ye_vals == 0.0) | (xe_vals == 0.0) - num_zero_variance = np.sum(zero_variance_mask) - - if num_zero_variance > 0: - warnings.warn( - f"Removed {num_zero_variance} data point(s) from '{fname}' due to zero variance.", - UserWarning - ) - # Keep only points with non-zero variances - valid_mask = ~zero_variance_mask - x_vals = x_vals[valid_mask] - y_vals = y_vals[valid_mask] - ye_vals = ye_vals[valid_mask] - xe_vals = xe_vals[valid_mask] - return DataSet1D( - x=x_vals, - y=y_vals, - ye=ye_vals, - xe=xe_vals, + x=data_group['coords'][coords_name].values, + y=data_group['data'][data_name].values, + ye=data_group['data'][data_name].variances, + xe=data_group['coords'][coords_name].variances, ) @@ -100,50 +72,6 @@ def _load_orso(fname: Union[TextIO, str]) -> sc.DataGroup: variances=np.square(o.data[:, 2]), ) attrs[f'R_{name}'] = {'orso_header': sc.scalar(Header.asdict(o.info))} - - # Check for zero variances and remove those points - total_removed = 0 - for key in list(data.keys()): - data_variances = data[key].variances - # Find the corresponding coordinate key for this data - coord_key = None - for coord_name in coords.keys(): - if data[key].dims == coords[coord_name].dims: - coord_key = coord_name - break - - if coord_key is None: - continue # Skip if no matching coordinate found - - coord_variances = coords[coord_key].variances - - # Find points with zero variances - zero_variance_mask = (data_variances == 0.0) | (coord_variances == 0.0) - num_zero_variance = np.sum(zero_variance_mask) - total_removed += num_zero_variance - - if num_zero_variance > 0: - # Keep only points with non-zero variances - valid_mask = ~zero_variance_mask - data[key] = sc.array( - dims=data[key].dims, - values=data[key].values[valid_mask], - variances=data[key].variances[valid_mask], - unit=data[key].unit if hasattr(data[key], 'unit') else None - ) - coords[coord_key] = sc.array( - dims=coords[coord_key].dims, - values=coords[coord_key].values[valid_mask], - variances=coords[coord_key].variances[valid_mask], - unit=coords[coord_key].unit - ) - - if total_removed > 0: - warnings.warn( - f"Removed {total_removed} data point(s) from '{fname}' due to zero variance.", - UserWarning - ) - return sc.DataGroup(data=data, coords=coords, attrs=attrs) @@ -192,37 +120,12 @@ def _load_txt(fname: Union[TextIO, str]) -> sc.DataGroup: data_name = 'R_' + basename coords_name = 'Qz_' + basename - - # Calculate variances - y_variances = np.square(e) - x_variances = np.square(xe) - - # Check for zero variances and remove those points - # Only check y_variances for zero if xe was all zeros (3-column file) - if np.all(xe == 0.0): - zero_variance_mask = (y_variances == 0.0) - else: - zero_variance_mask = (y_variances == 0.0) | (x_variances == 0.0) - num_zero_variance = np.sum(zero_variance_mask) - - if num_zero_variance > 0: - warnings.warn( - f"Removed {num_zero_variance} data point(s) from '{fname}' due to zero variance.", - UserWarning - ) - # Keep only points with non-zero variances - valid_mask = ~zero_variance_mask - x = x[valid_mask] - y = y[valid_mask] - y_variances = y_variances[valid_mask] - x_variances = x_variances[valid_mask] - - data = {data_name: sc.array(dims=[coords_name], values=y, variances=y_variances)} + data = {data_name: sc.array(dims=[coords_name], values=y, variances=np.square(e))} coords = { data[data_name].dims[0]: sc.array( dims=[coords_name], values=x, - variances=x_variances, + variances=np.square(xe), unit=sc.Unit('1/angstrom'), ) } diff --git a/src/easyreflectometry/fitting.py b/src/easyreflectometry/fitting.py index 41cf72ba..16cd275d 100644 --- a/src/easyreflectometry/fitting.py +++ b/src/easyreflectometry/fitting.py @@ -1,5 +1,6 @@ __author__ = 'github.com/arm61' +import warnings import numpy as np import scipp as sc from easyscience.fitting import AvailableMinimizers @@ -36,11 +37,45 @@ def fit(self, data: sc.DataGroup, id: int = 0) -> sc.DataGroup: :param data: DataGroup to be fitted to and populated :param method: Optimisation method + + :note: Points with zero variance in the data will be automatically masked + out during fitting. A warning will be issued if any such points + are found, indicating the number of points masked per reflectivity. """ refl_nums = [k[3:] for k in data['coords'].keys() if 'Qz' == k[:2]] - x = [data['coords'][f'Qz_{i}'].values for i in refl_nums] - y = [data['data'][f'R_{i}'].values for i in refl_nums] - dy = [1 / np.sqrt(data['data'][f'R_{i}'].variances) for i in refl_nums] + x = [] + y = [] + dy = [] + + # Process each reflectivity dataset + for i in refl_nums: + x_vals = data['coords'][f'Qz_{i}'].values + y_vals = data['data'][f'R_{i}'].values + variances = data['data'][f'R_{i}'].variances + + # Find points with non-zero variance + zero_variance_mask = (variances == 0.0) + num_zero_variance = np.sum(zero_variance_mask) + + if num_zero_variance > 0: + warnings.warn( + f"Masked {num_zero_variance} data point(s) in reflectivity {i} due to zero variance during fitting.", + UserWarning + ) + # Keep only points with non-zero variances + valid_mask = ~zero_variance_mask + x_vals_masked = x_vals[valid_mask] + y_vals_masked = y_vals[valid_mask] + variances_masked = variances[valid_mask] + else: + x_vals_masked = x_vals + y_vals_masked = y_vals + variances_masked = variances + + x.append(x_vals_masked) + y.append(y_vals_masked) + dy.append(1 / np.sqrt(variances_masked)) + result = self.easy_science_multi_fitter.fit(x, y, weights=dy) new_data = data.copy() for i, _ in enumerate(result): diff --git a/tests/test_data.py b/tests/test_data.py index 3655aa60..a174ae7f 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -104,48 +104,3 @@ def test_txt(self): assert_almost_equal(er_data['coords'][coords_name].values, n_data[:, 0]) assert_almost_equal(er_data['data'][data_name].variances, np.square(n_data[:, 2])) assert_almost_equal(er_data['coords'][coords_name].variances, np.square(n_data[:, 3])) - - def test_zero_variance_filtering(self): - """Test that data points with zero variance are properly filtered out.""" - import warnings - - fpath = os.path.join(PATH_STATIC, 'ref_zero_var.txt') - - # Load original data to verify expectations - original_data = np.loadtxt(fpath, delimiter=',', comments='#') - original_count = len(original_data) - zero_variance_count = np.sum(original_data[:, 2] == 0.0) - expected_filtered_count = original_count - zero_variance_count - - # Test with warnings captured - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - dataset = load_as_dataset(fpath) - - # Check that a warning about removed points was issued - warning_messages = [str(warning.message) for warning in w] - zero_variance_warning = None - for msg in warning_messages: - if f"Removed {zero_variance_count} data point(s)" in msg: - zero_variance_warning = msg - break - self.assertIsNotNone(zero_variance_warning, - f"Expected warning about removed points not found in: {warning_messages}") - - # Verify the correct number of points were filtered - self.assertEqual(len(dataset.x), expected_filtered_count) - self.assertEqual(len(dataset.y), expected_filtered_count) - self.assertEqual(len(dataset.ye), expected_filtered_count) - - # Verify no zero variances remain in the filtered data - self.assertTrue(np.all(dataset.ye > 0.0)) - - # Verify the filtered data matches expected values (non-zero variance points) - non_zero_mask = original_data[:, 2] != 0.0 - expected_x = original_data[non_zero_mask, 0] - expected_y = original_data[non_zero_mask, 1] - expected_ye = np.square(original_data[non_zero_mask, 2]) - - assert_almost_equal(dataset.x, expected_x) - assert_almost_equal(dataset.y, expected_y) - assert_almost_equal(dataset.ye, expected_ye) diff --git a/tests/test_fitting.py b/tests/test_fitting.py index ff93cb42..c07ed023 100644 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -70,3 +70,158 @@ def test_fitting(minimizer): assert 'SLD_0' in analysed.keys() assert 'success' in analysed.keys() assert analysed['success'] + + +def test_fitting_with_zero_variance(): + """Test that zero variance points are properly detected and masked during fitting when present in the data.""" + import warnings + import numpy as np + from easyreflectometry.data.measurement import load + + # Load data that contains zero variance points + fpath = os.path.join(PATH_STATIC, 'ref_zero_var.txt') + + # First, load the raw data to count zero variance points + raw_data = np.loadtxt(fpath, delimiter=',', comments='#') + zero_variance_count = np.sum(raw_data[:, 2] == 0.0) # Error column + assert zero_variance_count == 6, f"Expected 6 zero variance points, got {zero_variance_count}" + + # Load data through the measurement module (which already filters zero variance) + data = load(fpath) + + # Create a simple model for fitting + si = Material(2.07, 0, 'Si') + sio2 = Material(3.47, 0, 'SiO2') + film = Material(2.0, 0, 'Film') + d2o = Material(6.36, 0, 'D2O') + si_layer = Layer(si, 0, 0, 'Si layer') + sio2_layer = Layer(sio2, 30, 3, 'SiO2 layer') + film_layer = Layer(film, 250, 3, 'Film Layer') + superphase = Layer(d2o, 0, 3, 'D2O Subphase') + sample = Sample( + Multilayer(si_layer), + Multilayer(sio2_layer), + Multilayer(film_layer), + Multilayer(superphase), + name='Film Structure', + ) + resolution_function = PercentageFwhm(0.02) + model = Model(sample, 1, 1e-6, resolution_function, 'Film Model') + + # Set some parameters as fittable + sio2_layer.thickness.fixed = False + sio2_layer.thickness.bounds = (15, 50) + film_layer.thickness.fixed = False + film_layer.thickness.bounds = (200, 300) + film.sld.fixed = False + film.sld.bounds = (0.1, 3) + model.background.fixed = False + model.background.bounds = (1e-7, 1e-5) + model.scale.fixed = False + model.scale.bounds = (0.5, 1.5) + + interface = CalculatorFactory() + model.interface = interface + fitter = MultiFitter(model) + + # Capture warnings during fitting - check if zero variance points still exist in the data + # and are properly handled by the fitting method + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + analysed = fitter.fit(data) + + # Check if any zero variance warnings were issued during fitting + fitting_warnings = [str(warning.message) for warning in w + if "zero variance during fitting" in str(warning.message)] + + # The fitting method should handle zero variance points gracefully + # If there are any zero variance points remaining in the data, they should be masked + # and a warning should be issued + if len(fitting_warnings) > 0: + # Verify the warning message format and that it mentions masking points + for warning_msg in fitting_warnings: + assert "Masked" in warning_msg and "zero variance during fitting" in warning_msg + print(f"Info: {warning_msg}") # Log for debugging + + # Basic checks that fitting completed + # The keys will be based on the filename, not just '0' + model_keys = [k for k in analysed.keys() if k.endswith('_model')] + sld_keys = [k for k in analysed.keys() if k.startswith('SLD_')] + assert len(model_keys) > 0, f"No model keys found in {list(analysed.keys())}" + assert len(sld_keys) > 0, f"No SLD keys found in {list(analysed.keys())}" + assert 'success' in analysed.keys() + + +def test_fitting_with_manual_zero_variance(): + """Test the fit method with manually created zero variance points.""" + import warnings + import numpy as np + import scipp as sc + + # Create synthetic data with some zero variance points + qz_values = np.linspace(0.01, 0.3, 50) + r_values = np.exp(-qz_values * 100) + 0.01 # Synthetic reflectivity data + + # Create variances with some zero values + variances = np.ones_like(r_values) * 0.01**2 + # Set some variances to zero (simulate bad data points) + variances[10:15] = 0.0 # 5 zero variance points + variances[30:32] = 0.0 # 2 more zero variance points + + # Create scipp DataGroup manually + data = sc.DataGroup({ + 'coords': { + 'Qz_0': sc.array(dims=['Qz_0'], values=qz_values) + }, + 'data': { + 'R_0': sc.array(dims=['Qz_0'], values=r_values, variances=variances) + } + }) + + # Create a simple model for fitting + si = Material(2.07, 0, 'Si') + sio2 = Material(3.47, 0, 'SiO2') + film = Material(2.0, 0, 'Film') + d2o = Material(6.36, 0, 'D2O') + si_layer = Layer(si, 0, 0, 'Si layer') + sio2_layer = Layer(sio2, 30, 3, 'SiO2 layer') + film_layer = Layer(film, 250, 3, 'Film Layer') + superphase = Layer(d2o, 0, 3, 'D2O Subphase') + sample = Sample( + Multilayer(si_layer), + Multilayer(sio2_layer), + Multilayer(film_layer), + Multilayer(superphase), + name='Film Structure', + ) + resolution_function = PercentageFwhm(0.02) + model = Model(sample, 1, 1e-6, resolution_function, 'Film Model') + + # Set some parameters as fittable + sio2_layer.thickness.fixed = False + sio2_layer.thickness.bounds = (15, 50) + film_layer.thickness.fixed = False + film_layer.thickness.bounds = (200, 300) + film.sld.fixed = False + film.sld.bounds = (0.1, 3) + + interface = CalculatorFactory() + model.interface = interface + fitter = MultiFitter(model) + + # Capture warnings during fitting + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + analysed = fitter.fit(data) + + # Check that warnings were issued about zero variance points + fitting_warnings = [str(warning.message) for warning in w + if "zero variance during fitting" in str(warning.message)] + + # Should have one warning about the 7 zero variance points (5 + 2) + assert len(fitting_warnings) == 1, f"Expected 1 warning, got {len(fitting_warnings)}: {fitting_warnings}" + assert "Masked 7 data point(s)" in fitting_warnings[0], f"Unexpected warning content: {fitting_warnings[0]}" + # Basic checks that fitting completed despite zero variance points + assert 'R_0_model' in analysed.keys() + assert 'SLD_0' in analysed.keys() + assert 'success' in analysed.keys() From 19d543dc8078ab559ae31230e9a89a9bd9e7f21c Mon Sep 17 00:00:00 2001 From: Piotr Rozyczko Date: Fri, 3 Oct 2025 21:10:45 +0200 Subject: [PATCH 09/10] ruff --- src/easyreflectometry/fitting.py | 1 + tests/test_data.py | 1 - tests/test_fitting.py | 3 +++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/easyreflectometry/fitting.py b/src/easyreflectometry/fitting.py index 16cd275d..69352186 100644 --- a/src/easyreflectometry/fitting.py +++ b/src/easyreflectometry/fitting.py @@ -1,6 +1,7 @@ __author__ = 'github.com/arm61' import warnings + import numpy as np import scipp as sc from easyscience.fitting import AvailableMinimizers diff --git a/tests/test_data.py b/tests/test_data.py index a174ae7f..a974a75a 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -13,7 +13,6 @@ from easyreflectometry.data.measurement import _load_orso from easyreflectometry.data.measurement import _load_txt from easyreflectometry.data.measurement import load -from easyreflectometry.data.measurement import load_as_dataset PATH_STATIC = os.path.join(os.path.dirname(easyreflectometry.__file__), '..', '..', 'tests', '_static') diff --git a/tests/test_fitting.py b/tests/test_fitting.py index c07ed023..0b02ed82 100644 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -75,7 +75,9 @@ def test_fitting(minimizer): def test_fitting_with_zero_variance(): """Test that zero variance points are properly detected and masked during fitting when present in the data.""" import warnings + import numpy as np + from easyreflectometry.data.measurement import load # Load data that contains zero variance points @@ -155,6 +157,7 @@ def test_fitting_with_zero_variance(): def test_fitting_with_manual_zero_variance(): """Test the fit method with manually created zero variance points.""" import warnings + import numpy as np import scipp as sc From a47b1bddb38a96cd82752b7f9e0385b8693b4730 Mon Sep 17 00:00:00 2001 From: rozyczko Date: Mon, 6 Oct 2025 11:39:04 +0200 Subject: [PATCH 10/10] simplify logic --- src/easyreflectometry/fitting.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/easyreflectometry/fitting.py b/src/easyreflectometry/fitting.py index 69352186..99b5e614 100644 --- a/src/easyreflectometry/fitting.py +++ b/src/easyreflectometry/fitting.py @@ -63,15 +63,12 @@ def fit(self, data: sc.DataGroup, id: int = 0) -> sc.DataGroup: f"Masked {num_zero_variance} data point(s) in reflectivity {i} due to zero variance during fitting.", UserWarning ) - # Keep only points with non-zero variances - valid_mask = ~zero_variance_mask - x_vals_masked = x_vals[valid_mask] - y_vals_masked = y_vals[valid_mask] - variances_masked = variances[valid_mask] - else: - x_vals_masked = x_vals - y_vals_masked = y_vals - variances_masked = variances + + # Keep only points with non-zero variances + valid_mask = ~zero_variance_mask + x_vals_masked = x_vals[valid_mask] + y_vals_masked = y_vals[valid_mask] + variances_masked = variances[valid_mask] x.append(x_vals_masked) y.append(y_vals_masked)