diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 90967cd9..1d33313f 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -61,7 +61,7 @@ jobs: name: unit-tests-job flags: unittests files: ./coverage-unit.xml - fail_ci_if_error: true + fail_ci_if_error: false verbose: true token: ${{ secrets.CODECOV_TOKEN }} slug: EasyScience/EasyReflectometryLib diff --git a/CHANGELOG.md b/CHANGELOG.md index bb6a1617..69077f1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# Version 1.6.0 (1 May 2026) + +Add Mighell-based handling of non-positive-variance points in fitting (issue #256). +Non-positive-variance data points are no longer forcibly discarded; instead, a +hybrid objective applies a Mighell substitution for non-positive-variance points +while using standard weighted least squares for the rest. The previous masking +behavior is available via `objective='legacy_mask'`. New `objective` parameter on +`MultiFitter`, `fit()`, and `fit_single_data_set_1d()`. + # Version 1.3.3 (17 June 2025) Added Chi^2 and fit status to fitting results. diff --git a/docs/src/api/api.rst b/docs/src/api/api.rst index ea8e2661..55bd611a 100644 --- a/docs/src/api/api.rst +++ b/docs/src/api/api.rst @@ -28,6 +28,15 @@ Project provides a higher-level interface for managing models, experiments, and project +Fitting +======= +Fitting helpers and objective functions. + +.. toctree:: + :maxdepth: 1 + + fitting + Assemblies ========== Assemblies are collections of layers that are used to represent a specific physical setup. diff --git a/docs/src/api/fitting.rst b/docs/src/api/fitting.rst new file mode 100644 index 00000000..65e1ba15 --- /dev/null +++ b/docs/src/api/fitting.rst @@ -0,0 +1,112 @@ +Fitting +======= + +.. currentmodule:: easyreflectometry.fitting + +Objective functions and non-positive variance handling +----------------------------------------------------- + +:class:`MultiFitter` supports several objective modes for handling reflectometry +data during fitting, especially when measured variances are non-positive. + +The default objective is ``hybrid``. This uses ordinary weighted least squares +for points with positive variance and applies a Mighell-style substitution only +to points whose variance is non-positive. The older ``legacy_mask`` mode +drops non-positive-variance points before fitting. The ``mighell`` mode applies the +Mighell transform to every point. + +Mighell objective +~~~~~~~~~~~~~~~~~ + +The full ``mighell`` objective follows the algebraic form of the +``chi^2_gamma`` statistic described by Mighell for Poisson-distributed count +data: + +.. math:: + + \chi^2_\gamma = + \sum_i \frac{[n_i + \min(n_i, 1) - m_i]^2}{n_i + 1} + +where ``n_i`` are observed counts and ``m_i`` are model values. + +In EasyReflectometry this is implemented as a weighted least-squares problem. +For each observed value ``y_i`` the fitted target is shifted to + +.. math:: + + y_{\mathrm{eff},i} = y_i + \min(y_i, 1) + +and the effective uncertainty is + +.. math:: + + \sigma_i = \sqrt{y_i + 1} + +so the minimized objective is + +.. math:: + + \sum_i \left(\frac{y_{\mathrm{eff},i} - f_i}{\sigma_i}\right)^2 = + \sum_i \frac{[y_i + \min(y_i, 1) - f_i]^2}{y_i + 1} + +This is the same algebraic form as Mighell's statistic, with the model value +``f_i`` replacing ``m_i``. + +Scope and interpretation +~~~~~~~~~~~~~~~~~~~~~~~~ + +Mighell's statistic was derived for Poisson-distributed count data. In +reflectometry workflows, the fitted values are usually normalized +reflectivities or intensities rather than raw counts. They may already have +been processed, scaled, background-corrected, or otherwise transformed before +they reach the fitter. + +This distinction matters when interpreting the result. The full ``mighell`` +objective is not only a reweighting of residuals; it also changes the fitted +target from ``y`` to ``y + min(y, 1)``. For values between zero and one, this +can substantially increase the target value. A fit can therefore have a good +Mighell objective value while looking poorer against the originally plotted +reflectivity curve, or while having a worse classical chi-square. + +For reflectometry data, ``hybrid`` is generally the recommended compromise: +it preserves ordinary weighted least-squares behavior where positive variances are +available, while still allowing non-positive-variance points to contribute through the +Mighell-style substitution. + +Objective modes +~~~~~~~~~~~~~~~ + +``hybrid`` + Default. Use standard weighted least squares for points with positive + variance and apply the Mighell substitution only where variance is + non-positive. + +``mighell`` + Apply the Mighell transform to all points. The reported objective chi-square + is evaluated in transformed objective space and should not be interpreted as + a classical chi-square against the original reflectivity values. + +``legacy_mask`` + Remove non-positive-variance points before fitting and use standard weighted least + squares for the remaining points. + +``auto`` + Alias for ``hybrid``. + +Fit metrics +~~~~~~~~~~~ + +The fitter exposes both objective-space and classical fit metrics after fitting. +``objective_chi2`` and ``objective_reduced_chi`` describe the minimized +objective, which may include transformed targets under ``hybrid`` or +``mighell``. ``classical_chi2`` and ``classical_reduced_chi`` are computed +against the original observed reflectivity values using only points with +positive variance. + +API reference +------------- + +.. automodule:: easyreflectometry.fitting + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file diff --git a/docs/src/tutorials/fitting/simple_fitting.ipynb b/docs/src/tutorials/fitting/simple_fitting.ipynb index 5dcd5eb8..ea761e75 100644 --- a/docs/src/tutorials/fitting/simple_fitting.ipynb +++ b/docs/src/tutorials/fitting/simple_fitting.ipynb @@ -30,7 +30,9 @@ "source": [ "%matplotlib inline\n", "\n", + "import matplotlib.pyplot as plt\n", "import pooch\n", + "import refl1d\n", "import refnx\n", "\n", "import easyreflectometry\n", @@ -63,7 +65,8 @@ "outputs": [], "source": [ "print(f'easyreflectometry: {easyreflectometry.__version__}')\n", - "print(f'refnx: {refnx.__version__}')" + "print(f'refnx: {refnx.__version__}')\n", + "print(f'refl1d: {refl1d.__version__}')" ] }, { @@ -395,7 +398,7 @@ "## Choosing our calculation engine\n", "\n", "The `easyreflectometry` package enables the calculation of the reflectometry profile using either [*refnx*](https://refnx.readthedocs.io/) or [*Refl1D*](https://refl1d.readthedocs.io/en/latest/).\n", - "For this tutorial, we will stick to the current default, which is *refnx*. \n", + "We will first run the fit with the current default, *refnx*, and then rebuild the same starting model with *Refl1D* to compare the results.\n", "The calculator must be created and associated with the model that we are to fit. " ] }, @@ -464,7 +467,8 @@ "metadata": {}, "outputs": [], "source": [ - "analysed = fitter.fit(data)" + "analysed_refnx = fitter.fit(data)\n", + "analysed = analysed_refnx" ] }, { @@ -513,18 +517,119 @@ "model" ] }, + { + "cell_type": "markdown", + "id": "2b2fe558", + "metadata": {}, + "source": [ + "## Repeating the optimisation with Refl1D\n", + "\n", + "To compare backends fairly, we rebuild the same initial model and then switch the calculator to `refl1d`.\n", + "This ensures that the second optimisation starts from the same parameter guesses rather than from the already-optimised `refnx` result." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "341abc6f", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'refl1d: {refl1d.__version__}')\n", + "\n", + "si_refl1d = Material(sld=2.07, isld=0, name='Si')\n", + "sio2_refl1d = Material(sld=3.47, isld=0, name='SiO2')\n", + "film_refl1d = Material(sld=2.0, isld=0, name='Film')\n", + "d2o_refl1d = Material(sld=6.36, isld=0, name='D2O')\n", + "\n", + "si_layer_refl1d = Layer(material=si_refl1d, thickness=0, roughness=0, name='Si layer')\n", + "sio2_layer_refl1d = Layer(material=sio2_refl1d, thickness=30, roughness=3, name='SiO2 layer')\n", + "film_layer_refl1d = Layer(material=film_refl1d, thickness=250, roughness=3, name='Film Layer')\n", + "subphase_refl1d = Layer(material=d2o_refl1d, thickness=0, roughness=3, name='D2O Subphase')\n", + "\n", + "superphase_refl1d = Multilayer([si_layer_refl1d, sio2_layer_refl1d], name='Si/SiO2 Superphase')\n", + "sample_refl1d = Sample(superphase_refl1d, Multilayer(film_layer_refl1d), Multilayer(subphase_refl1d), name='Film Structure')\n", + "\n", + "resolution_function_refl1d = PercentageFwhm(0.02)\n", + "model_refl1d = Model(\n", + " sample=sample_refl1d,\n", + " scale=1,\n", + " background=1e-6,\n", + " resolution_function=resolution_function_refl1d,\n", + " name='Film Model (Refl1D)'\n", + ")\n", + "\n", + "sio2_layer_refl1d.thickness.bounds = (15, 50)\n", + "film_layer_refl1d.thickness.bounds = (200, 300)\n", + "sio2_layer_refl1d.roughness.bounds = (1, 15)\n", + "film_layer_refl1d.roughness.bounds = (1, 15)\n", + "subphase_refl1d.roughness.bounds = (1, 15)\n", + "film_layer_refl1d.material.sld.bounds = (0.1, 3)\n", + "model_refl1d.background.bounds = (1e-8, 1e-5)\n", + "model_refl1d.scale.bounds = (0.5, 1.5)\n", + "\n", + "interface_refl1d = CalculatorFactory()\n", + "interface_refl1d.switch('refl1d')\n", + "model_refl1d.interface = interface_refl1d\n", + "\n", + "print(interface_refl1d.current_interface.name)" + ] + }, + { + "cell_type": "markdown", + "id": "2f764d79", + "metadata": {}, + "source": [ + "We can now fit the same dataset with `refl1d` and compare the fitted curve and reduced chi-squared with the earlier `refnx` result." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70229cf9", + "metadata": {}, + "outputs": [], + "source": [ + "fitter_refl1d = MultiFitter(model_refl1d)\n", + "analysed_refl1d = fitter_refl1d.fit(data)\n", + "\n", + "print(f'refnx reduced chi: {analysed_refnx[\"reduced_chi\"]:.4f}')\n", + "print(f'refl1d reduced chi: {analysed_refl1d[\"reduced_chi\"]:.4f}')\n", + "\n", + "qz = data['coords']['Qz_0'].values\n", + "reflectivity = data['data']['R_0'].values\n", + "uncertainty = data['data']['R_0'].variances**0.5\n", + "\n", + "plt.figure(figsize=(8, 5))\n", + "plt.errorbar(qz, reflectivity, yerr=uncertainty, fmt='o', markersize=3, color='black', alpha=0.6, label='Data')\n", + "plt.plot(qz, analysed_refnx['R_0_model'].values, linewidth=2.5, label='refnx fit')\n", + "plt.plot(qz, analysed_refl1d['R_0_model'].values, linewidth=2.5, label='refl1d fit')\n", + "plt.yscale('log')\n", + "plt.xlabel(r'$Q_z$ (1/Å)')\n", + "plt.ylabel('Reflectivity')\n", + "plt.grid(True, which='both', alpha=0.25)\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "print('refnx model')\n", + "print(model)\n", + "print()\n", + "print('refl1d model')\n", + "print(model_refl1d)" + ] + }, { "cell_type": "markdown", "id": "df6db8c3-6515-478b-bd2e-aac967579231", "metadata": {}, "source": [ - "We note here that the results obtained are very similar to those from the [*refnx* tutorial](https://refnx.readthedocs.io/en/latest/getting_started.html#Fitting-a-neutron-reflectometry-dataset), which is hardly surprising given that we have used the *refnx* engine in this example." + "We note here that the results obtained with [*refnx*](https://refnx.readthedocs.io/en/latest/) and [*Refl1D*](https://refl1d.readthedocs.io/en/latest/) are very similar for this simple slab model, with only small backend-dependent differences in the fitted curve and optimised parameters." ] } ], "metadata": { "kernelspec": { - "display_name": "easyref", + "display_name": ".venv (3.11.9)", "language": "python", "name": "python3" }, @@ -538,7 +643,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/notebooks/zero_variance_fitting.ipynb b/notebooks/zero_variance_fitting.ipynb new file mode 100644 index 00000000..ea3ede96 --- /dev/null +++ b/notebooks/zero_variance_fitting.ipynb @@ -0,0 +1,724 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1a8a52ac", + "metadata": {}, + "source": [ + "# Zero-Variance Handling in Reflectometry Fitting\n", + "\n", + "This notebook demonstrates how `easyreflectometry` handles data points with **zero variance**\n", + "during fitting. We compare three objective strategies:\n", + "\n", + "| Objective | Behaviour |\n", + "|---|---|\n", + "| `legacy_mask` | Drops zero-variance points entirely (old default) |\n", + "| `hybrid` | Keeps all points; applies Mighell substitution only to zero-variance entries (**new default**) |\n", + "| `mighell` | Applies the Mighell (1999) transform to every point |\n", + "\n", + "The experimental data comes from `tests/_static/ref_zero_var.txt`, which contains 192 data points,\n", + "6 of which have zero error (= zero variance)." + ] + }, + { + "cell_type": "markdown", + "id": "5c6429ac", + "metadata": {}, + "source": [ + "## 1. Import Required Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4983acb8", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import warnings\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "from easyreflectometry.calculators import CalculatorFactory\n", + "from easyreflectometry.data.measurement import load\n", + "from easyreflectometry.fitting import MultiFitter\n", + "from easyreflectometry.model import Model\n", + "from easyreflectometry.model import PercentageFwhm\n", + "from easyreflectometry.sample import Layer\n", + "from easyreflectometry.sample import Material\n", + "from easyreflectometry.sample import Multilayer\n", + "from easyreflectometry.sample import Sample\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "96e174d4", + "metadata": {}, + "source": [ + "## 2. Load Experimental Data\n", + "\n", + "The file `ref_zero_var.txt` is a comma-separated file with columns: `q (Å⁻¹)`, `R`, `error`.\n", + "Several points near the high-Q end have `error = 0.0`, meaning zero variance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47fb3264", + "metadata": {}, + "outputs": [], + "source": [ + "DATA_PATH = os.path.join('..', 'tests', '_static', 'ref_zero_var.txt')\n", + "\n", + "# Load raw data for inspection\n", + "raw = np.loadtxt(DATA_PATH, delimiter=',', comments='#')\n", + "q_raw, r_raw, err_raw = raw[:, 0], raw[:, 1], raw[:, 2]\n", + "\n", + "# Load through easyreflectometry (produces a scipp DataGroup)\n", + "data = load(DATA_PATH)\n", + "data_key = next(iter(data['data'].keys()))\n", + "coord_key = next(iter(data['coords'].keys()))\n", + "result_model_key = f'{data_key}_model'\n", + "\n", + "print(f'Total data points : {len(q_raw)}')\n", + "print(f'Q range : [{q_raw.min():.4e}, {q_raw.max():.4e}] Å⁻¹')\n", + "print(f'R range : [{r_raw.min():.4e}, {r_raw.max():.4e}]')\n", + "print(f'Data key : {data_key}')\n", + "print(f'Coord key : {coord_key}')\n", + "print(f'Model key : {result_model_key}')" + ] + }, + { + "cell_type": "markdown", + "id": "c4495939", + "metadata": {}, + "source": [ + "## 3. Inspect Data and Identify Zero-Variance Points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1be2ca78", + "metadata": {}, + "outputs": [], + "source": [ + "zero_mask = err_raw == 0.0\n", + "zero_indices = np.where(zero_mask)[0]\n", + "print(f'Number of zero-variance points: {zero_mask.sum()}')\n", + "print(f'Indices: {zero_indices}')\n", + "print('Q-values with zero variance:')\n", + "for idx in zero_indices:\n", + " print(f' [{idx:3d}] Q = {q_raw[idx]:.4e} Å⁻¹, R = {r_raw[idx]:.4e}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "373cdc62", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(10, 6))\n", + "\n", + "# Plot all data\n", + "valid = ~zero_mask\n", + "ax.errorbar(q_raw[valid], r_raw[valid], yerr=err_raw[valid],\n", + " fmt='o', ms=3, color='C0', alpha=0.7, label='Valid points')\n", + "ax.plot(q_raw[zero_mask], r_raw[zero_mask],\n", + " 'rx', ms=10, mew=2, label=f'Zero-variance points ({zero_mask.sum()})')\n", + "\n", + "ax.set_yscale('log')\n", + "ax.set_xlabel('Q (Å⁻¹)')\n", + "ax.set_ylabel('Reflectivity')\n", + "ax.set_title('Raw experimental data — zero-variance points highlighted')\n", + "ax.legend()\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "428759d3", + "metadata": {}, + "source": [ + "## 4. Define Materials\n", + "\n", + "A simple film-on-substrate structure: **Si** substrate / **SiO₂** native oxide / **Film** / **D₂O** superphase." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "006fb3cf", + "metadata": {}, + "outputs": [], + "source": [ + "si = Material(2.07, 0, 'Si')\n", + "sio2 = Material(3.47, 0, 'SiO2')\n", + "film = Material(2.0, 0, 'Film')\n", + "d2o = Material(6.36, 0, 'D2O')" + ] + }, + { + "cell_type": "markdown", + "id": "6031b9ad", + "metadata": {}, + "source": [ + "## 5. Define Layers and Sample Structure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2643141f", + "metadata": {}, + "outputs": [], + "source": [ + "si_layer = Layer(si, 0, 0, 'Si layer')\n", + "sio2_layer = Layer(sio2, 30, 3, 'SiO2 layer')\n", + "film_layer = Layer(film, 250, 3, 'Film layer')\n", + "superphase = Layer(d2o, 0, 3, 'D2O superphase')\n", + "\n", + "sample = Sample(\n", + " Multilayer(si_layer),\n", + " Multilayer(sio2_layer),\n", + " Multilayer(film_layer),\n", + " Multilayer(superphase),\n", + " name='Film Structure',\n", + ")\n", + "print(sample)" + ] + }, + { + "cell_type": "markdown", + "id": "51bcedeb", + "metadata": {}, + "source": [ + "## 6. Create the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22e6fbd9", + "metadata": {}, + "outputs": [], + "source": [ + "resolution_function = PercentageFwhm(0.02)\n", + "model = Model(sample, 1, 1e-6, resolution_function, 'Film Model')" + ] + }, + { + "cell_type": "markdown", + "id": "7eaf6993", + "metadata": {}, + "source": [ + "## 7. Set Calculator Backend and Compute Initial Curve" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "175afdcc", + "metadata": {}, + "outputs": [], + "source": [ + "interface = CalculatorFactory()\n", + "model.interface = interface\n", + "\n", + "# Compute initial model reflectivity\n", + "r_init = interface.fit_func(q_raw, model.unique_name)" + ] + }, + { + "cell_type": "markdown", + "id": "fff782bb", + "metadata": {}, + "source": [ + "## 8. Plot Initial Model vs Experimental Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7656b8b", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(10, 6))\n", + "\n", + "ax.errorbar(q_raw[valid], r_raw[valid], yerr=err_raw[valid],\n", + " fmt='o', ms=3, color='C0', alpha=0.6, label='Experiment (valid)')\n", + "ax.plot(q_raw[zero_mask], r_raw[zero_mask],\n", + " 'rx', ms=10, mew=2, label='Experiment (zero variance)')\n", + "ax.plot(q_raw, r_init, '-', color='C1', lw=1.5, label='Initial model')\n", + "\n", + "ax.set_yscale('log')\n", + "ax.set_xlabel('Q (Å⁻¹)')\n", + "ax.set_ylabel('Reflectivity')\n", + "ax.set_title('Experimental data vs initial model (before fitting)')\n", + "ax.legend()\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6c916d41", + "metadata": {}, + "source": [ + "## 9. Set Fitting Parameters and Constraints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a85ab7e8", + "metadata": {}, + "outputs": [], + "source": [ + "sio2_layer.thickness.fixed = False\n", + "sio2_layer.thickness.bounds = (15, 50)\n", + "\n", + "film_layer.thickness.fixed = False\n", + "film_layer.thickness.bounds = (200, 300)\n", + "\n", + "film.sld.fixed = False\n", + "film.sld.bounds = (0.1, 3)\n", + "\n", + "model.background.fixed = False\n", + "model.background.bounds = (1e-7, 1e-5)\n", + "\n", + "model.scale.fixed = False\n", + "model.scale.bounds = (0.5, 1.5)\n", + "\n", + "print('Free parameters:')\n", + "for p in model.get_fit_parameters():\n", + " print(f' {p.name:20s} = {float(p.value):.4g} bounds={p.bounds}')" + ] + }, + { + "cell_type": "markdown", + "id": "b83af903", + "metadata": {}, + "source": [ + "## 10. Zero-Variance Handling — Three Objective Modes\n", + "\n", + "We now fit the same data with each objective mode and compare the results.\n", + "\n", + "### How each mode handles zero-variance points\n", + "\n", + "- **`legacy_mask`** — zero-variance points are dropped before fitting. The fitter sees fewer\n", + " data points, and those Q-values have no influence on the result.\n", + "- **`hybrid`** (default) — valid points use standard weighted least-squares ($w = 1/\\sigma$).\n", + " Zero-variance points get the **Mighell (1999)** substitution:\n", + " $y_\\text{eff} = y + \\min(y, 1)$, $\\sigma = \\sqrt{\\max(y + 1,\\, \\varepsilon)}$.\n", + " This keeps all data in the fit while giving zero-variance points reasonable weight.\n", + "- **`mighell`** — applies the Mighell transform to *every* point, not just zero-variance ones.\n", + " This changes both the weighting and the fitted target for the entire dataset.\n", + "\n", + "Below we report two metric families:\n", + "\n", + "- **objective chi²** — the quantity actually minimized by the selected objective\n", + "- **classical chi²** — a standard variance-weighted chi² computed only on the original\n", + " positive-variance points, for comparison" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8182f0a9", + "metadata": {}, + "outputs": [], + "source": [ + "def build_fresh_model():\n", + " \"\"\"Build a fresh model+interface so each fit starts from the same initial state.\"\"\"\n", + " _si = Material(2.07, 0, 'Si')\n", + " _sio2 = Material(3.47, 0, 'SiO2')\n", + " _film = Material(2.0, 0, 'Film')\n", + " _d2o = Material(6.36, 0, 'D2O')\n", + "\n", + " _si_layer = Layer(_si, 0, 0, 'Si layer')\n", + " _sio2_layer = Layer(_sio2, 30, 3, 'SiO2 layer')\n", + " _film_layer = Layer(_film, 250, 3, 'Film layer')\n", + " _superphase = Layer(_d2o, 0, 3, 'D2O superphase')\n", + "\n", + " _sample = Sample(\n", + " Multilayer(_si_layer),\n", + " Multilayer(_sio2_layer),\n", + " Multilayer(_film_layer),\n", + " Multilayer(_superphase),\n", + " name='Film Structure',\n", + " )\n", + " _resolution = PercentageFwhm(0.02)\n", + " _model = Model(_sample, 1, 1e-6, _resolution, 'Film Model')\n", + "\n", + " _sio2_layer.thickness.fixed = False\n", + " _sio2_layer.thickness.bounds = (15, 50)\n", + " _film_layer.thickness.fixed = False\n", + " _film_layer.thickness.bounds = (200, 300)\n", + " _film.sld.fixed = False\n", + " _film.sld.bounds = (0.1, 3)\n", + " _model.background.fixed = False\n", + " _model.background.bounds = (1e-7, 1e-5)\n", + " _model.scale.fixed = False\n", + " _model.scale.bounds = (0.5, 1.5)\n", + "\n", + " _model.interface = CalculatorFactory()\n", + " return _model" + ] + }, + { + "cell_type": "markdown", + "id": "fd6f4833", + "metadata": {}, + "source": [ + "### 11. Fit with `legacy_mask`\n", + "\n", + "Zero-variance points are simply dropped." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e856e00f", + "metadata": {}, + "outputs": [], + "source": [ + "model_mask = build_fresh_model()\n", + "fitter_mask = MultiFitter(model_mask, objective='legacy_mask')\n", + "\n", + "with warnings.catch_warnings(record=True) as w_mask:\n", + " warnings.simplefilter('always')\n", + " result_mask = fitter_mask.fit(data)\n", + "\n", + "for w in w_mask:\n", + " print(f'[WARNING] {w.message}')\n", + "print(f'Success : {result_mask[\"success\"]}')\n", + "print(f'Objective reduced chi² : {result_mask[\"objective_reduced_chi\"]:.6f}')\n", + "print(f'Objective total chi² : {fitter_mask.objective_chi2:.6f}')\n", + "print(f'Classical reduced chi² : {result_mask[\"classical_reduced_chi\"]:.6f}')\n", + "print(f'Classical total chi² : {fitter_mask.classical_chi2:.6f}')\n", + "\n", + "r_fit_mask = result_mask[result_model_key].values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79755052", + "metadata": {}, + "outputs": [], + "source": [ + "result_mask\n" + ] + }, + { + "cell_type": "markdown", + "id": "24cb6fe0", + "metadata": {}, + "source": [ + "### 12. Fit with `hybrid` (new default)\n", + "\n", + "All points kept; Mighell substitution applied only to zero-variance entries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20c034ba", + "metadata": {}, + "outputs": [], + "source": [ + "model_hybrid = build_fresh_model()\n", + "fitter_hybrid = MultiFitter(model_hybrid, objective='hybrid')\n", + "\n", + "with warnings.catch_warnings(record=True) as w_hybrid:\n", + " warnings.simplefilter('always')\n", + " result_hybrid = fitter_hybrid.fit(data)\n", + "\n", + "for w in w_hybrid:\n", + " print(f'[WARNING] {w.message}')\n", + "print(f'Success : {result_hybrid[\"success\"]}')\n", + "print(f'Objective reduced chi² : {result_hybrid[\"objective_reduced_chi\"]:.6f}')\n", + "print(f'Objective total chi² : {fitter_hybrid.objective_chi2:.6f}')\n", + "print(f'Classical reduced chi² : {result_hybrid[\"classical_reduced_chi\"]:.6f}')\n", + "print(f'Classical total chi² : {fitter_hybrid.classical_chi2:.6f}')\n", + "\n", + "r_fit_hybrid = result_hybrid[result_model_key].values" + ] + }, + { + "cell_type": "markdown", + "id": "79258c5f", + "metadata": {}, + "source": [ + "### 13. Fit with `mighell`\n", + "\n", + "Mighell transform applied to *all* points — the chi² landscape changes entirely." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90b08fdd", + "metadata": {}, + "outputs": [], + "source": [ + "model_mighell = build_fresh_model()\n", + "fitter_mighell = MultiFitter(model_mighell, objective='mighell')\n", + "\n", + "with warnings.catch_warnings(record=True) as w_mighell:\n", + " warnings.simplefilter('always')\n", + " result_mighell = fitter_mighell.fit(data)\n", + "\n", + "for w in w_mighell:\n", + " print(f'[WARNING] {w.message}')\n", + "print(f'Success : {result_mighell[\"success\"]}')\n", + "print(f'Objective reduced chi² : {result_mighell[\"objective_reduced_chi\"]:.6f}')\n", + "print(f'Objective total chi² : {fitter_mighell.objective_chi2:.6f}')\n", + "print(f'Classical reduced chi² : {result_mighell[\"classical_reduced_chi\"]:.6f}')\n", + "print(f'Classical total chi² : {fitter_mighell.classical_chi2:.6f}')\n", + "\n", + "r_fit_mighell = result_mighell[result_model_key].values" + ] + }, + { + "cell_type": "markdown", + "id": "6136693d", + "metadata": {}, + "source": [ + "## 14. Compare Fit Results\n", + "\n", + "### Parameter comparison table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77c23990", + "metadata": {}, + "outputs": [], + "source": [ + "models = {\n", + " 'legacy_mask': model_mask,\n", + " 'hybrid': model_hybrid,\n", + " 'mighell': model_mighell,\n", + "}\n", + "results = {\n", + " 'legacy_mask': result_mask,\n", + " 'hybrid': result_hybrid,\n", + " 'mighell': result_mighell,\n", + "}\n", + "\n", + "# Build descriptive labels to disambiguate duplicates (e.g. two \"thickness\" params)\n", + "ref_params = model_mask.get_fit_parameters()\n", + "seen = {}\n", + "labels = []\n", + "for p in ref_params:\n", + " n = p.name\n", + " seen[n] = seen.get(n, 0) + 1\n", + " labels.append(f'{n} ({seen[n]})')\n", + "# Simplify if name appears only once\n", + "name_counts = {}\n", + "for p in ref_params:\n", + " name_counts[p.name] = name_counts.get(p.name, 0) + 1\n", + "labels_clean = []\n", + "seen2 = {}\n", + "for p in ref_params:\n", + " n = p.name\n", + " seen2[n] = seen2.get(n, 0) + 1\n", + " if name_counts[n] > 1:\n", + " labels_clean.append(f'{n}_{seen2[n]}')\n", + " else:\n", + " labels_clean.append(n)\n", + "\n", + "obj_keys = ['legacy_mask', 'hybrid', 'mighell']\n", + "\n", + "header = f'{\"Parameter\":<24s} {\"legacy_mask\":>14s} {\"hybrid\":>14s} {\"mighell\":>14s}'\n", + "print(header)\n", + "print('-' * len(header))\n", + "\n", + "for i, label in enumerate(labels_clean):\n", + " vals = []\n", + " for obj in obj_keys:\n", + " params = models[obj].get_fit_parameters()\n", + " vals.append(float(params[i].value))\n", + " print(f'{label:<24s} {vals[0]:>14.4g} {vals[1]:>14.4g} {vals[2]:>14.4g}')\n", + "\n", + "print('-' * len(header))\n", + "print(f'{\"objective red. chi²\":<24s} {result_mask[\"objective_reduced_chi\"]:>14.4f} '\n", + " f'{result_hybrid[\"objective_reduced_chi\"]:>14.4f} {result_mighell[\"objective_reduced_chi\"]:>14.6f}')\n", + "print(f'{\"classical red. chi²\":<24s} {result_mask[\"classical_reduced_chi\"]:>14.4f} '\n", + " f'{result_hybrid[\"classical_reduced_chi\"]:>14.4f} {result_mighell[\"classical_reduced_chi\"]:>14.4f}')\n", + "print(f'{\"success\":<24s} {str(result_mask[\"success\"]):>14s} '\n", + " f'{str(result_hybrid[\"success\"]):>14s} {str(result_mighell[\"success\"]):>14s}')" + ] + }, + { + "cell_type": "markdown", + "id": "906b9cdb", + "metadata": {}, + "source": [ + "### Fitted reflectivity curves — all three objectives" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4f8f3a7", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(10, 6))\n", + "\n", + "# Experimental data\n", + "ax.errorbar(\n", + " q_raw[valid],\n", + " r_raw[valid],\n", + " yerr=err_raw[valid],\n", + " fmt='o',\n", + " ms=2,\n", + " color='grey',\n", + " alpha=0.5,\n", + " label='Experiment',\n", + ")\n", + "ax.plot(q_raw[zero_mask], r_raw[zero_mask], 'rx', ms=10, mew=2, label='Zero-variance points')\n", + "\n", + "# Fitted curves\n", + "ax.plot(\n", + " q_raw,\n", + " r_fit_mask,\n", + " '-',\n", + " lw=1.5,\n", + " label=(\n", + " f'legacy_mask (obj={result_mask[\"objective_reduced_chi\"]:.3g}, '\n", + " f'class={result_mask[\"classical_reduced_chi\"]:.3g})'\n", + " ),\n", + ")\n", + "ax.plot(\n", + " q_raw,\n", + " r_fit_hybrid,\n", + " '--',\n", + " lw=1.5,\n", + " label=(\n", + " f'hybrid (obj={result_hybrid[\"objective_reduced_chi\"]:.3g}, '\n", + " f'class={result_hybrid[\"classical_reduced_chi\"]:.3g})'\n", + " ),\n", + ")\n", + "ax.plot(\n", + " q_raw,\n", + " r_fit_mighell,\n", + " ':',\n", + " lw=1.5,\n", + " label=(\n", + " f'mighell (obj={result_mighell[\"objective_reduced_chi\"]:.3g}, '\n", + " f'class={result_mighell[\"classical_reduced_chi\"]:.3g})'\n", + " ),\n", + ")\n", + "\n", + "ax.set_yscale('log')\n", + "ax.set_xlabel('Q (Å⁻¹)')\n", + "ax.set_ylabel('Reflectivity')\n", + "ax.set_title('Fitted reflectivity — comparison of objective modes')\n", + "ax.legend(fontsize=9)\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "52821f2d", + "metadata": {}, + "source": [ + "## 15. Examine Residuals\n", + "\n", + "We compute normalised residuals $(R_\\text{exp} - R_\\text{model}) / \\sigma$ for each objective.\n", + "Zero-variance points are shown separately — for `legacy_mask` they were excluded from\n", + "the fit, while `hybrid` and `mighell` included them with transformed weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0292ef11", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=True)\n", + "\n", + "fits = {\n", + " 'legacy_mask': r_fit_mask,\n", + " 'hybrid': r_fit_hybrid,\n", + " 'mighell': r_fit_mighell,\n", + "}\n", + "\n", + "for ax, (obj_name, r_fit) in zip(axes, fits.items()):\n", + " # Normalised residuals for valid points\n", + " residuals_valid = (r_raw[valid] - r_fit[valid]) / err_raw[valid]\n", + " ax.plot(q_raw[valid], residuals_valid, 'o', ms=2, alpha=0.6, color='C0', label='Valid points')\n", + "\n", + " # Un-normalised residuals at zero-variance points (no sigma to normalise by)\n", + " residuals_zero = r_raw[zero_mask] - r_fit[zero_mask]\n", + " ax.plot(q_raw[zero_mask], np.zeros_like(residuals_zero), 'rx', ms=10, mew=2,\n", + " label='Zero-var Q positions')\n", + "\n", + " ax.axhline(0, color='k', lw=0.5)\n", + " ax.set_ylabel('Normalised residual')\n", + " ax.set_title(f'{obj_name}')\n", + " ax.legend(fontsize=8, loc='upper right')\n", + "\n", + "axes[-1].set_xlabel('Q (Å⁻¹)')\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "a2f39948", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "| Objective | Zero-var handling | Objective chi² interpretation | Classical chi² comparison |\n", + "|---|---|---|---|\n", + "| `legacy_mask` | Dropped from fit | Same as classical chi² on retained points | Directly comparable |\n", + "| `hybrid` | Mighell substitution for zero-var only | Slightly modified objective | Usually close to classical when zero-var fraction is small |\n", + "| `mighell` | Mighell transform for **all** points | **Not** a classical chi²; target and weights both change | Can look visibly worse against plotted reflectivity even when the objective is minimized well |\n", + "\n", + "The `hybrid` mode (new default) is recommended: it keeps all data points in the fit while\n", + "preserving a classical chi² comparison on the original positive-variance points.\n", + "\n", + "The full `mighell` mode matches the Mighell paper's objective mathematically, but that paper is\n", + "derived for Poisson count data. Applied to normalized reflectivity, it should be interpreted as a\n", + "Poisson-style objective rather than a visually comparable reduced chi² fit." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "era", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 9833bc40..ec3df724 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dev = [ docs = [ "myst_parser", "nbsphinx", + "plopp", "sphinx<=8.1.3", "sphinx_autodoc_typehints", "sphinx_book_theme", diff --git a/src/easyreflectometry/__version__.py b/src/easyreflectometry/__version__.py index 77f1c8e6..bcd8d54e 100644 --- a/src/easyreflectometry/__version__.py +++ b/src/easyreflectometry/__version__.py @@ -1 +1 @@ -__version__ = '1.5.0' +__version__ = '1.6.0' diff --git a/src/easyreflectometry/fitting.py b/src/easyreflectometry/fitting.py index 86df41e3..0750beb5 100644 --- a/src/easyreflectometry/fitting.py +++ b/src/easyreflectometry/fitting.py @@ -11,14 +11,145 @@ from easyreflectometry.data import DataSet1D from easyreflectometry.model import Model +_VALID_OBJECTIVES = ('legacy_mask', 'mighell', 'hybrid', 'auto') +_EPS = 1e-30 + + +def _validate_objective(objective: str) -> str: + """Validate and resolve the objective string. + + :param objective: The objective mode string. + :type objective: str + :return: Resolved objective string ('auto' becomes 'hybrid'). + :rtype: str + :raises ValueError: If the objective is not one of the valid options. + """ + if objective not in _VALID_OBJECTIVES: + raise ValueError(f'Unknown objective {objective!r}. Valid options: {_VALID_OBJECTIVES}') + if objective == 'auto': + return 'hybrid' + return objective + + +def _prepare_fit_arrays( + x_vals: np.ndarray, + y_vals: np.ndarray, + variances: np.ndarray, + objective: str, +) -> tuple[np.ndarray, np.ndarray, np.ndarray, dict]: + """Prepare x, y_eff, and weights arrays for fitting based on the objective mode. + + For ``legacy_mask``, zero-variance points are removed from all arrays. + For ``hybrid``, valid-variance points use standard WLS while zero-variance + points use Mighell-transformed y and weights. + For ``mighell``, all points use the Mighell transform. + + Note: ``variances`` here means σ² (the scipp convention), not σ. + + :param x_vals: Independent variable values. + :type x_vals: np.ndarray + :param y_vals: Observed dependent variable values. + :type y_vals: np.ndarray + :param variances: Variance (σ²) of each observed point. + :type variances: np.ndarray + :param objective: One of 'legacy_mask', 'hybrid', 'mighell'. + :type objective: str + :return: Tuple of (x_out, y_eff, weights, stats) where stats is a dict + with keys 'valid', 'mighell_substituted', 'masked'. + :rtype: tuple[np.ndarray, np.ndarray, np.ndarray, dict] + """ + n = len(y_vals) + zero_mask = variances <= 0.0 + n_zero = int(np.sum(zero_mask)) + n_valid = n - n_zero + + if objective == 'legacy_mask': + valid = ~zero_mask + x_out = x_vals[valid] + y_eff = y_vals[valid] + if n_valid > 0: + weights = 1.0 / np.sqrt(variances[valid]) + else: + weights = np.array([]) + stats = {'valid': n_valid, 'mighell_substituted': 0, 'masked': n_zero, 'transformed_all_points': False} + return x_out, y_eff, weights, stats + + # hybrid or mighell + y_eff = np.copy(y_vals) + sigma = np.empty(n) + + if objective == 'mighell': + apply_mighell = np.ones(n, dtype=bool) + else: + # hybrid: apply Mighell only to zero-variance points + apply_mighell = zero_mask + + # Standard WLS for non-Mighell points + standard = ~apply_mighell + if np.any(standard): + sigma[standard] = np.sqrt(variances[standard]) + + # Mighell transform for selected points + if np.any(apply_mighell): + y_m = y_vals[apply_mighell] + delta = np.minimum(y_m, 1.0) + y_eff[apply_mighell] = y_m + delta + sigma[apply_mighell] = np.sqrt(np.maximum(y_m + 1.0, _EPS)) + + weights = 1.0 / sigma + n_mighell = int(np.sum(apply_mighell)) + stats = { + 'valid': n - n_mighell, + 'mighell_substituted': n_mighell, + 'masked': 0, + 'transformed_all_points': bool(objective == 'mighell'), + } + return x_vals, y_eff, weights, stats + + +def _compute_weighted_chi2(y_obs: np.ndarray, y_calc: np.ndarray, sigma: np.ndarray) -> float: + """Return weighted chi-square for finite, strictly positive uncertainties.""" + valid = np.isfinite(y_obs) & np.isfinite(y_calc) & np.isfinite(sigma) & (sigma > 0.0) + if not np.any(valid): + return 0.0 + residual = (y_obs[valid] - y_calc[valid]) / sigma[valid] + return float(np.sum(residual**2)) + + +def _compute_reduced_chi2(chi2: float, n_points: int, n_params: int) -> float | None: + """Return reduced chi-square or None when degrees of freedom are not positive.""" + dof = int(n_points) - int(n_params) + if dof <= 0: + return None + return float(chi2 / dof) + + +def _fit_result_reduced_chi(result: FitResults, n_points: int | None = None) -> float: + """Return reduced chi-square from either supported FitResults attribute name.""" + for attribute in ('reduced_chi', 'reduced_chi2'): + value = getattr(result, attribute, None) + if isinstance(value, (int, float, np.number)): + return float(value) + if n_points is not None: + reduced_chi = _compute_reduced_chi2(float(result.chi2), n_points, result.n_pars) + if reduced_chi is not None: + return reduced_chi + raise AttributeError('FitResults object has neither reduced_chi nor reduced_chi2') + class MultiFitter: - def __init__(self, *args: Model): - r"""A convinence class for the :py:class:`easyscience.Fitting.Fitting` + def __init__(self, *args: Model, objective: str = 'hybrid'): + r"""A convenience class for the :py:class:`easyscience.Fitting.Fitting` which will populate the :py:class:`sc.DataGroup` appropriately after the fitting is performed. - :param args: Reflectometry model + :param args: Reflectometry model(s). + :param objective: Zero-variance handling strategy. One of + ``'hybrid'`` (default, Mighell for zero-variance, WLS otherwise), + ``'mighell'`` (Mighell transform for all points), + ``'legacy_mask'`` (drop zero-variance points), + ``'auto'`` (alias for ``'hybrid'``). + :type objective: str """ # This lets the unique_name be passed with the fit_func. @@ -32,22 +163,35 @@ def wrapped(*args, **kwargs): self._models = args self.easy_science_multi_fitter = EasyScienceMultiFitter(args, self._fit_func) self._fit_results: list[FitResults] | None = None - - def fit(self, data: sc.DataGroup, id: int = 0) -> sc.DataGroup: + self._classical_fit_metrics: list[dict] | None = None + self._objective = _validate_objective(objective) + + def fit(self, data: sc.DataGroup, id: int = 0, objective: str | None = None) -> sc.DataGroup: + """Perform the fitting and populate the DataGroups with the result. + + :param data: DataGroup to be fitted to and populated. + :type data: sc.DataGroup + :param id: Unused parameter kept for backward compatibility. + :type id: int + :param objective: Per-call override for the zero-variance objective. + If ``None``, uses the instance default set at construction. + :type objective: str or None + :return: A new DataGroup with fitted model curves, SLD profiles, and fit statistics. + :rtype: sc.DataGroup + + :note: Under the ``mighell`` objective all points are transformed, + so ``reduced_chi`` is not a classical chi-square statistic. + Under ``hybrid``, only zero-variance points are transformed; + when they are a small fraction of the data the chi-square + remains approximately classical. """ - Perform the fitting and populate the DataGroups with the result. - - :param data: DataGroup to be fitted to and populated - :param method: Optimisation method + obj = _validate_objective(objective) if objective is not None else self._objective - :note: Points with zero variance in the data will be automatically masked - out during fitting. A warning will be issued if any such points - are found, indicating the number of points masked per reflectivity. - """ refl_nums = [k[3:] for k in data['coords'].keys() if 'Qz' == k[:2]] x = [] y = [] dy = [] + original_arrays = [] # Process each reflectivity dataset for i in refl_nums: @@ -55,34 +199,38 @@ def fit(self, data: sc.DataGroup, id: int = 0) -> sc.DataGroup: y_vals = data['data'][f'R_{i}'].values variances = data['data'][f'R_{i}'].variances - # Find points with non-zero variance - zero_variance_mask = variances == 0.0 - num_zero_variance = np.sum(zero_variance_mask) + x_out, y_eff, weights, stats = _prepare_fit_arrays(x_vals, y_vals, variances, obj) - if num_zero_variance > 0: + if stats['masked'] > 0: warnings.warn( - f'Masked {num_zero_variance} data point(s) in reflectivity {i} due to zero variance during fitting.', + f'Masked {stats["masked"]} data point(s) in reflectivity {i} due to zero variance during fitting.', + UserWarning, + ) + if stats.get('transformed_all_points'): + warnings.warn( + f'Applied Mighell transform to all {len(y_vals)} point(s) in reflectivity {i} during fitting.', + UserWarning, + ) + elif stats['mighell_substituted'] > 0: + warnings.warn( + f'Applied Mighell substitution to {stats["mighell_substituted"]} ' + f'zero-variance point(s) in reflectivity {i} during fitting.', UserWarning, ) - # Keep only points with non-zero variances - valid_mask = ~zero_variance_mask - x_vals_masked = x_vals[valid_mask] - y_vals_masked = y_vals[valid_mask] - variances_masked = variances[valid_mask] - - x.append(x_vals_masked) - y.append(y_vals_masked) - dy.append(1 / np.sqrt(variances_masked)) + x.append(x_out) + y.append(y_eff) + dy.append(weights) + original_arrays.append({'x': x_vals, 'y': y_vals, 'variances': variances}) result = self.easy_science_multi_fitter.fit(x, y, weights=dy) self._fit_results = result + self._classical_fit_metrics = [] new_data = data.copy() for i, _ in enumerate(result): id = refl_nums[i] - new_data[f'R_{id}_model'] = sc.array( - dims=[f'Qz_{id}'], values=self._fit_func[i](data['coords'][f'Qz_{id}'].values) - ) + model_curve = self._fit_func[i](data['coords'][f'Qz_{id}'].values) + new_data[f'R_{id}_model'] = sc.array(dims=[f'Qz_{id}'], values=model_curve) sld_profile = self.easy_science_multi_fitter._fit_objects[i].interface.sld_profile(self._models[i].unique_name) new_data[f'SLD_{id}'] = sc.array(dims=[f'z_{id}'], values=sld_profile[1] * 1e-6, unit=sc.Unit('1/angstrom') ** 2) if 'attrs' in new_data: @@ -90,41 +238,88 @@ def fit(self, data: sc.DataGroup, id: int = 0) -> sc.DataGroup: new_data['coords'][f'z_{id}'] = sc.array( dims=[f'z_{id}'], values=sld_profile[0], unit=(1 / new_data['coords'][f'Qz_{id}'].unit).unit ) - new_data['reduced_chi'] = float(result[i].reduced_chi) + original = original_arrays[i] + sigma_classical = np.sqrt(np.clip(original['variances'], 0.0, None)) + n_classical_points = int(np.sum(original['variances'] > 0.0)) + classical_chi2 = _compute_weighted_chi2(original['y'], model_curve, sigma_classical) + classical_reduced_chi = _compute_reduced_chi2(classical_chi2, n_classical_points, result[i].n_pars) + objective_chi2 = float(result[i].chi2) + objective_reduced_chi = _fit_result_reduced_chi(result[i], np.size(result[i].x)) + + self._classical_fit_metrics.append( + { + 'classical_chi2': classical_chi2, + 'classical_reduced_chi': classical_reduced_chi, + 'objective_chi2': objective_chi2, + 'objective_reduced_chi': objective_reduced_chi, + 'n_classical_points': n_classical_points, + } + ) + + new_data['objective_chi2'] = objective_chi2 + new_data['objective_reduced_chi'] = objective_reduced_chi + new_data['classical_chi2'] = classical_chi2 + new_data['classical_reduced_chi'] = classical_reduced_chi + new_data['reduced_chi'] = objective_reduced_chi new_data['success'] = result[i].success return new_data - def fit_single_data_set_1d(self, data: DataSet1D) -> FitResults: + def fit_single_data_set_1d(self, data: DataSet1D, objective: str | None = None) -> FitResults: + """Perform fitting on a single 1D dataset. + + :param data: The 1D dataset to fit. Note that ``data.ye`` stores + variances (σ²), not standard deviations. + :type data: DataSet1D + :param objective: Per-call override for the zero-variance objective. + If ``None``, uses the instance default set at construction. + :type objective: str or None + :return: Fit results from the minimizer. + :rtype: FitResults """ - Perform the fitting and populate the DataGroups with the result. + obj = _validate_objective(objective) if objective is not None else self._objective - :param data: DataGroup to be fitted to and populated - :param method: Optimisation method - """ x_vals = np.asarray(data.x) y_vals = np.asarray(data.y) variances = np.asarray(data.ye) - zero_variance_mask = variances == 0.0 - num_zero_variance = int(np.sum(zero_variance_mask)) + x_out, y_eff, weights, stats = _prepare_fit_arrays(x_vals, y_vals, variances, obj) - if num_zero_variance > 0: + if stats['masked'] > 0: + warnings.warn( + f'Masked {stats["masked"]} data point(s) in single-dataset fit due to zero variance during fitting.', + UserWarning, + ) + if stats.get('transformed_all_points'): + warnings.warn( + f'Applied Mighell transform to all {len(y_vals)} point(s) in single-dataset fit during fitting.', + UserWarning, + ) + elif stats['mighell_substituted'] > 0: warnings.warn( - f'Masked {num_zero_variance} data point(s) in single-dataset fit due to zero variance during fitting.', + f'Applied Mighell substitution to {stats["mighell_substituted"]} ' + 'zero-variance point(s) in single-dataset fit during fitting.', UserWarning, ) - valid_mask = ~zero_variance_mask - if not np.any(valid_mask): + if obj == 'legacy_mask' and len(x_out) == 0: raise ValueError('Cannot fit single dataset: all points have zero variance.') - x_vals_masked = x_vals[valid_mask] - y_vals_masked = y_vals[valid_mask] - variances_masked = variances[valid_mask] - - weights = 1.0 / np.sqrt(variances_masked) - result = self.easy_science_multi_fitter.fit(x=[x_vals_masked], y=[y_vals_masked], weights=[weights])[0] + result = self.easy_science_multi_fitter.fit(x=[x_out], y=[y_eff], weights=[weights])[0] self._fit_results = [result] + sigma_classical = np.sqrt(np.clip(variances, 0.0, None)) + model_curve = self._fit_func[0](x_vals) + n_classical_points = int(np.sum(variances > 0.0)) + classical_chi2 = _compute_weighted_chi2(y_vals, model_curve, sigma_classical) + classical_reduced_chi = _compute_reduced_chi2(classical_chi2, n_classical_points, result.n_pars) + self._classical_fit_metrics = [ + { + 'classical_chi2': classical_chi2, + 'classical_reduced_chi': classical_reduced_chi, + 'objective_chi2': float(result.chi2), + 'objective_reduced_chi': _fit_result_reduced_chi(result, len(x_out)), + 'n_classical_points': n_classical_points, + } + ] return result @property @@ -149,6 +344,33 @@ def reduced_chi(self) -> float | None: return total_chi2 / total_dof + @property + def classical_chi2(self) -> float | None: + """Classical chi-squared using only points with positive variances.""" + if self._classical_fit_metrics is None: + return None + return float(sum(metric['classical_chi2'] for metric in self._classical_fit_metrics)) + + @property + def classical_reduced_chi(self) -> float | None: + """Reduced classical chi-squared using only points with positive variances.""" + if self._classical_fit_metrics is None or self._fit_results is None: + return None + total_chi2 = self.classical_chi2 + total_points = sum(metric['n_classical_points'] for metric in self._classical_fit_metrics) + n_params = self._fit_results[0].n_pars + return _compute_reduced_chi2(total_chi2, total_points, n_params) + + @property + def objective_chi2(self) -> float | None: + """Objective-space chi-squared returned by the minimizer.""" + return self.chi2 + + @property + def objective_reduced_chi(self) -> float | None: + """Objective-space reduced chi-squared returned by the minimizer.""" + return self.reduced_chi + def switch_minimizer(self, minimizer: AvailableMinimizers) -> None: """ Switch the minimizer for the fitting. diff --git a/src/easyreflectometry/limits.py b/src/easyreflectometry/limits.py new file mode 100644 index 00000000..691f86e3 --- /dev/null +++ b/src/easyreflectometry/limits.py @@ -0,0 +1,44 @@ +import numpy as np +from easyscience.variable import Parameter + +# Fixed-range limit definitions +SLD_LIMITS = (-1.0, 10.0) +SCALE_LIMITS = (0.0, 10.0) + + +def apply_default_limits(parameter: Parameter, kind: str) -> None: + """Apply default min/max to a parameter if current bounds are infinite. + + :param parameter: The parameter to adjust. + :type parameter: Parameter + :param kind: One of 'thickness', 'roughness', 'sld', 'isld', 'scale'. + :type kind: str + """ + if not parameter.independent: + return + + if kind in ('thickness', 'roughness'): + _apply_percentage_limits(parameter) + elif kind in ('sld', 'isld'): + _apply_fixed_limits(parameter, *SLD_LIMITS) + elif kind == 'scale': + _apply_fixed_limits(parameter, *SCALE_LIMITS) + + +def _apply_percentage_limits(parameter: Parameter) -> None: + """Set min to 50% and max to 200% of the current value, only if current bounds are inf.""" + value = parameter.value + if value == 0.0: + return + if np.isinf(parameter.min): + parameter.min = 0.5 * value + if np.isinf(parameter.max): + parameter.max = 2.0 * value + + +def _apply_fixed_limits(parameter: Parameter, low: float, high: float) -> None: + """Set fixed min/max, only if current bounds are inf.""" + if np.isinf(parameter.min) and low <= parameter.value: + parameter.min = low + if np.isinf(parameter.max) and high >= parameter.value: + parameter.max = high diff --git a/src/easyreflectometry/model/model.py b/src/easyreflectometry/model/model.py index e4bdaac5..7f651fa2 100644 --- a/src/easyreflectometry/model/model.py +++ b/src/easyreflectometry/model/model.py @@ -12,6 +12,7 @@ from easyscience import global_object from easyscience.variable import Parameter +from easyreflectometry.limits import apply_default_limits from easyreflectometry.sample import BaseAssembly from easyreflectometry.sample import Sample from easyreflectometry.utils import get_as_parameter @@ -86,6 +87,7 @@ def __init__( resolution_function = PercentageFwhm(DEFAULTS['resolution']['value']) scale = get_as_parameter('scale', scale, DEFAULTS) + apply_default_limits(scale, 'scale') background = get_as_parameter('background', background, DEFAULTS) self.color = color self._is_default = False diff --git a/src/easyreflectometry/project.py b/src/easyreflectometry/project.py index 6f7096c1..7e1ab3c9 100644 --- a/src/easyreflectometry/project.py +++ b/src/easyreflectometry/project.py @@ -21,6 +21,7 @@ from easyreflectometry.data.measurement import extract_orso_title from easyreflectometry.data.measurement import load_data_from_orso_file from easyreflectometry.fitting import MultiFitter +from easyreflectometry.limits import apply_default_limits from easyreflectometry.model import Model from easyreflectometry.model import ModelCollection from easyreflectometry.model import PercentageFwhm @@ -91,6 +92,52 @@ def parameters(self) -> List[Parameter]: parameters.append(param) return parameters + def _sync_parameter_states(self) -> None: + """Apply project-level parameter enablement and default limits. + + Superphase thickness/roughness and subphase thickness are physically + meaningless and are marked as disabled. Thickness and roughness default + ranges are then applied only to enabled parameters created from project + defaults, leaving explicit user-provided bounds untouched. + """ + if self._models is None: + return + + disabled_ids: set[int] = set() + for model in self._models: + sample = model.sample + if sample is None or len(sample) == 0: + continue + superphase = sample.superphase + if superphase is not None: + disabled_ids.add(id(superphase.thickness)) + disabled_ids.add(id(superphase.roughness)) + subphase = sample.subphase + if subphase is not None: + disabled_ids.add(id(subphase.thickness)) + + for model in self._models: + sample = model.sample + if sample is None or len(sample) == 0: + continue + for assembly in sample: + for layer in assembly.layers: + self._sync_layer_parameter_state(layer.thickness, 'thickness', disabled_ids) + self._sync_layer_parameter_state(layer.roughness, 'roughness', disabled_ids) + + def _sync_layer_parameter_state(self, parameter: Parameter, kind: str, disabled_ids: set[int]) -> None: + """Update a layer parameter's enabled state and pending default limits.""" + if id(parameter) in disabled_ids: + parameter.enabled = False + return + + if getattr(parameter, 'default_limits_pending', False): + delattr(parameter, 'default_limits_pending') + if getattr(parameter, 'enabled', True): + parameter.min = -np.inf + parameter.max = np.inf + apply_default_limits(parameter, kind) + @property def q_min(self): if self._q_min is None: @@ -204,6 +251,7 @@ def models(self, models: ModelCollection) -> None: self._materials.extend(self._get_materials_in_models()) for model in self._models: model.interface = self._calculator + self._sync_parameter_states() @property def fitter(self) -> MultiFitter: @@ -220,7 +268,17 @@ def calculator(self) -> str: @calculator.setter def calculator(self, calculator: str) -> None: + if calculator == self._calculator.current_interface_name: + return + self._calculator.switch(calculator) + self._calculator.reset_storage() + + for model in self._models: + model.generate_bindings() + + self._fitter = None + self._fitter_model_index = None @property def minimizer(self) -> AvailableMinimizers: @@ -324,6 +382,7 @@ def add_sample_from_orso(self, sample: Sample) -> None: model.interface = self._calculator # Extract materials from the new model and add to project materials self._materials.extend(self._get_materials_from_model(model)) + self._sync_parameter_states() # Switch to the newly added model so its data is visible in the UI self.current_model_index = len(self._models) - 1 diff --git a/src/easyreflectometry/sample/elements/layers/layer.py b/src/easyreflectometry/sample/elements/layers/layer.py index 28f13a38..513b18f9 100644 --- a/src/easyreflectometry/sample/elements/layers/layer.py +++ b/src/easyreflectometry/sample/elements/layers/layer.py @@ -65,18 +65,23 @@ def __init__( if unique_name is None: unique_name = global_object.generate_unique_name(self.__class__.__name__) + thickness_value = thickness thickness = get_as_parameter( name='thickness', value=thickness, default_dict=DEFAULTS, unique_name_prefix=f'{unique_name}_Thickness', ) + thickness.default_limits_pending = not isinstance(thickness_value, Parameter) + + roughness_value = roughness roughness = get_as_parameter( name='roughness', value=roughness, default_dict=DEFAULTS, unique_name_prefix=f'{unique_name}_Roughness', ) + roughness.default_limits_pending = not isinstance(roughness_value, Parameter) super().__init__( name=name, diff --git a/src/easyreflectometry/sample/elements/materials/material.py b/src/easyreflectometry/sample/elements/materials/material.py index 249cd160..8c030031 100644 --- a/src/easyreflectometry/sample/elements/materials/material.py +++ b/src/easyreflectometry/sample/elements/materials/material.py @@ -7,6 +7,7 @@ from easyscience import global_object from easyscience.variable import Parameter +from easyreflectometry.limits import apply_default_limits from easyreflectometry.utils import get_as_parameter from ...base_core import BaseCore @@ -62,12 +63,15 @@ def __init__( default_dict=DEFAULTS, unique_name_prefix=f'{unique_name}_Sld', ) + apply_default_limits(sld, 'sld') + isld = get_as_parameter( name='isld', value=isld, default_dict=DEFAULTS, unique_name_prefix=f'{unique_name}_Isld', ) + apply_default_limits(isld, 'isld') super().__init__( name=name, diff --git a/tests/model/test_model.py b/tests/model/test_model.py index 5745501e..49ce60fc 100644 --- a/tests/model/test_model.py +++ b/tests/model/test_model.py @@ -37,7 +37,7 @@ def test_default(self): assert_equal(str(p.scale.unit), 'dimensionless') assert_equal(p.scale.value, 1.0) assert_equal(p.scale.min, 0.0) - assert_equal(p.scale.max, np.inf) + assert_equal(p.scale.max, 10.0) assert_equal(p.scale.fixed, True) assert_equal(p.background.display_name, 'background') assert_equal(str(p.background.unit), 'dimensionless') @@ -73,7 +73,7 @@ def test_from_pars(self): assert_equal(str(mod.scale.unit), 'dimensionless') assert_equal(mod.scale.value, 2.0) assert_equal(mod.scale.min, 0.0) - assert_equal(mod.scale.max, np.inf) + assert_equal(mod.scale.max, 10.0) assert_equal(mod.scale.fixed, True) assert_equal(mod.background.display_name, 'background') assert_equal(str(mod.background.unit), 'dimensionless') diff --git a/tests/sample/elements/materials/test_material.py b/tests/sample/elements/materials/test_material.py index a9ff1dde..c07a2217 100644 --- a/tests/sample/elements/materials/test_material.py +++ b/tests/sample/elements/materials/test_material.py @@ -5,7 +5,6 @@ __author__ = 'github.com/arm61' __version__ = '0.0.1' -import numpy as np from easyscience import global_object from easyreflectometry.sample.elements.materials.material import DEFAULTS @@ -21,14 +20,14 @@ def test_no_arguments(self): assert p.sld.display_name == 'sld' assert str(p.sld.unit) == '1/Å^2' assert p.sld.value == 4.186 - assert p.sld.min == -np.inf - assert p.sld.max == np.inf + assert p.sld.min == -1.0 + assert p.sld.max == 10.0 assert p.sld.fixed is True assert p.isld.display_name == 'isld' assert str(p.isld.unit) == '1/Å^2' assert p.isld.value == 0.0 - assert p.isld.min == -np.inf - assert p.isld.max == np.inf + assert p.isld.min == -1.0 + assert p.isld.max == 10.0 assert p.isld.fixed is True def test_shuffled_arguments(self): @@ -38,14 +37,14 @@ def test_shuffled_arguments(self): assert p.sld.display_name == 'sld' assert str(p.sld.unit) == '1/Å^2' assert p.sld.value == 6.908 - assert p.sld.min == -np.inf - assert p.sld.max == np.inf + assert p.sld.min == -1.0 + assert p.sld.max == 10.0 assert p.sld.fixed is True assert p.isld.display_name == 'isld' assert str(p.isld.unit) == '1/Å^2' assert p.isld.value == -0.278 - assert p.isld.min == -np.inf - assert p.isld.max == np.inf + assert p.isld.min == -1.0 + assert p.isld.max == 10.0 assert p.isld.fixed is True def test_only_sld_key(self): @@ -53,8 +52,8 @@ def test_only_sld_key(self): assert p.sld.display_name == 'sld' assert str(p.sld.unit) == '1/Å^2' assert p.sld.value == 10 - assert p.sld.min == -np.inf - assert p.sld.max == np.inf + assert p.sld.min == -1.0 + assert p.sld.max == 10.0 assert p.sld.fixed is True def test_only_sld_key_parameter(self): @@ -69,8 +68,8 @@ def test_only_isld_key(self): assert p.isld.display_name == 'isld' assert str(p.isld.unit) == '1/Å^2' assert p.isld.value == 10 - assert p.isld.min == -np.inf - assert p.isld.max == np.inf + assert p.isld.min == -1.0 + assert p.isld.max == 10.0 assert p.isld.fixed is True def test_only_isld_key_parameter(self): diff --git a/tests/test_fitting.py b/tests/test_fitting.py index 446f10b9..2a03a98c 100644 --- a/tests/test_fitting.py +++ b/tests/test_fitting.py @@ -5,6 +5,7 @@ import numpy as np import pytest +import scipp as sc from easyscience.fitting.minimizers.factory import AvailableMinimizers import easyreflectometry @@ -12,6 +13,8 @@ from easyreflectometry.data import DataSet1D from easyreflectometry.data.measurement import load from easyreflectometry.fitting import MultiFitter +from easyreflectometry.fitting import _prepare_fit_arrays +from easyreflectometry.fitting import _validate_objective from easyreflectometry.model import Model from easyreflectometry.model import PercentageFwhm from easyreflectometry.sample import Layer @@ -76,7 +79,7 @@ def test_fitting(minimizer): def test_fitting_with_zero_variance(): - """Test that zero variance points are properly detected and masked during fitting when present in the data.""" + """Test that zero variance points are handled via Mighell substitution (hybrid default).""" import warnings import numpy as np @@ -129,26 +132,24 @@ def test_fitting_with_zero_variance(): model.interface = interface fitter = MultiFitter(model) - # Capture warnings during fitting - check if zero variance points still exist in the data - # and are properly handled by the fitting method + # Capture warnings during fitting with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') analysed = fitter.fit(data) - # Check if any zero variance warnings were issued during fitting - fitting_warnings = [str(warning.message) for warning in w if 'zero variance during fitting' in str(warning.message)] + # Under hybrid default, zero variance points trigger Mighell substitution warnings + mighell_warnings = [str(warning.message) for warning in w if 'Mighell substitution' in str(warning.message)] + mask_warnings = [str(warning.message) for warning in w if 'Masked' in str(warning.message)] + + # Hybrid mode should NOT produce mask warnings + assert len(mask_warnings) == 0, f'Unexpected mask warnings under hybrid: {mask_warnings}' - # The fitting method should handle zero variance points gracefully - # If there are any zero variance points remaining in the data, they should be masked - # and a warning should be issued - if len(fitting_warnings) > 0: - # Verify the warning message format and that it mentions masking points - for warning_msg in fitting_warnings: - assert 'Masked' in warning_msg and 'zero variance during fitting' in warning_msg - print(f'Info: {warning_msg}') # Log for debugging + # If there are zero-variance points in the loaded data, Mighell warnings should appear + if len(mighell_warnings) > 0: + for warning_msg in mighell_warnings: + assert 'zero-variance point(s)' in warning_msg # Basic checks that fitting completed - # The keys will be based on the filename, not just '0' model_keys = [k for k in analysed.keys() if k.endswith('_model')] sld_keys = [k for k in analysed.keys() if k.startswith('SLD_')] assert len(model_keys) > 0, f'No model keys found in {list(analysed.keys())}' @@ -157,7 +158,7 @@ def test_fitting_with_zero_variance(): def test_fitting_with_manual_zero_variance(): - """Test the fit method with manually created zero variance points.""" + """Test the fit method with manually created zero variance points using hybrid (default).""" import warnings import numpy as np @@ -217,12 +218,12 @@ def test_fitting_with_manual_zero_variance(): warnings.simplefilter('always') analysed = fitter.fit(data) - # Check that warnings were issued about zero variance points - fitting_warnings = [str(warning.message) for warning in w if 'zero variance during fitting' in str(warning.message)] + # Under hybrid default, should get Mighell substitution warning, not masking + mighell_warnings = [str(warning.message) for warning in w if 'Mighell substitution' in str(warning.message)] + + assert len(mighell_warnings) == 1, f'Expected 1 Mighell warning, got {len(mighell_warnings)}: {mighell_warnings}' + assert '7 zero-variance point(s)' in mighell_warnings[0], f'Unexpected warning content: {mighell_warnings[0]}' - # Should have one warning about the 7 zero variance points (5 + 2) - assert len(fitting_warnings) == 1, f'Expected 1 warning, got {len(fitting_warnings)}: {fitting_warnings}' - assert 'Masked 7 data point(s)' in fitting_warnings[0], f'Unexpected warning content: {fitting_warnings[0]}' # Basic checks that fitting completed despite zero variance points assert 'R_0_model' in analysed.keys() assert 'SLD_0' in analysed.keys() @@ -230,9 +231,10 @@ def test_fitting_with_manual_zero_variance(): def test_fit_single_data_set_1d_masks_zero_variance_points(): + """Legacy mask mode: zero-variance points are dropped.""" model = Model() model.interface = CalculatorFactory() - fitter = MultiFitter(model) + fitter = MultiFitter(model, objective='legacy_mask') captured = {} mock_result = MagicMock() @@ -255,7 +257,7 @@ def _fake_fit(*, x, y, weights): ye=np.array([0.01, 0.0, 0.04]), ) - with pytest.warns(UserWarning, match='Masked 1 data point\(s\) in single-dataset fit'): + with pytest.warns(UserWarning, match='Masked 1 data point\\(s\\) in single-dataset fit'): result = fitter.fit_single_data_set_1d(data) assert result is mock_result @@ -286,9 +288,10 @@ def test_reduced_chi_uses_global_dof_across_fit_results(): def test_fit_single_data_set_1d_all_zero_variance_raises(): + """Legacy mask mode raises when all points have zero variance.""" model = Model() model.interface = CalculatorFactory() - fitter = MultiFitter(model) + fitter = MultiFitter(model, objective='legacy_mask') data = DataSet1D( name='all_zero', @@ -376,3 +379,408 @@ def _fake_fit(*, x, y, weights): assert result is mock_result assert np.allclose(captured['x'][0], np.array([0.01, 0.02, 0.03])) assert np.allclose(captured['y'][0], np.array([1.0, 0.8, 0.6])) + + +# --- New tests for objective-based zero-variance handling --- + + +def test_objective_validation_rejects_unknown_value(): + with pytest.raises(ValueError, match='Unknown objective'): + _validate_objective('bad_value') + + +def test_objective_validation_resolves_auto(): + assert _validate_objective('auto') == 'hybrid' + assert _validate_objective('hybrid') == 'hybrid' + assert _validate_objective('legacy_mask') == 'legacy_mask' + assert _validate_objective('mighell') == 'mighell' + + +def test_prepare_fit_arrays_weights_always_positive_and_finite(): + """Weights must be strictly positive and finite for all inputs and objectives.""" + test_cases = [ + # (y_vals, variances, description) + (np.array([0.0]), np.array([0.0]), 'y=0, var=0'), + (np.array([-0.5]), np.array([0.0]), 'y=-0.5, var=0'), + (np.array([-1.0]), np.array([0.0]), 'y=-1, var=0'), + (np.array([1e6]), np.array([0.0]), 'y=1e6, var=0'), + (np.array([0.5, 0.3, 0.1]), np.array([0.0, 0.0, 0.0]), 'all-zero variances'), + (np.array([0.5, 0.3, 0.1]), np.array([0.01, 0.0, 0.04]), 'mixed variances'), + (np.array([0.0, -0.5, -1.0, 1e6]), np.array([0.0, 0.0, 0.0, 0.0]), 'edge y values'), + ] + + for objective in ('hybrid', 'mighell'): + for y_vals, variances, desc in test_cases: + x = np.arange(len(y_vals), dtype=float) + _, _, weights, _ = _prepare_fit_arrays(x, y_vals, variances, objective) + assert len(weights) == len(y_vals), f'Wrong length for {desc}, {objective}' + assert np.all(weights > 0), f'Non-positive weight for {desc}, {objective}: {weights}' + assert np.all(np.isfinite(weights)), f'Non-finite weight for {desc}, {objective}: {weights}' + + +def test_prepare_fit_arrays_legacy_mask_drops_zero_variance(): + x = np.array([0.01, 0.02, 0.03]) + y = np.array([1.0, 0.8, 0.6]) + var = np.array([0.01, 0.0, 0.04]) + + x_out, y_eff, weights, stats = _prepare_fit_arrays(x, y, var, 'legacy_mask') + + assert np.allclose(x_out, [0.01, 0.03]) + assert np.allclose(y_eff, [1.0, 0.6]) + assert np.allclose(weights, [1.0 / np.sqrt(0.01), 1.0 / np.sqrt(0.04)]) + assert stats == {'valid': 2, 'mighell_substituted': 0, 'masked': 1, 'transformed_all_points': False} + + +def test_prepare_fit_arrays_hybrid_transforms_zero_variance(): + x = np.array([0.01, 0.02, 0.03]) + y = np.array([1.0, 0.8, 0.6]) + var = np.array([0.01, 0.0, 0.04]) + + x_out, y_eff, weights, stats = _prepare_fit_arrays(x, y, var, 'hybrid') + + # x unchanged + assert np.allclose(x_out, x) + # Index 0 and 2: standard WLS (unchanged y) + assert y_eff[0] == pytest.approx(1.0) + assert y_eff[2] == pytest.approx(0.6) + assert weights[0] == pytest.approx(1.0 / np.sqrt(0.01)) + assert weights[2] == pytest.approx(1.0 / np.sqrt(0.04)) + # Index 1: Mighell transform — y_eff = y + min(y, 1) = 0.8 + 0.8 = 1.6 + assert y_eff[1] == pytest.approx(0.8 + 0.8) + # sigma = sqrt(y + 1) = sqrt(1.8) + assert weights[1] == pytest.approx(1.0 / np.sqrt(1.8)) + assert stats == {'valid': 2, 'mighell_substituted': 1, 'masked': 0, 'transformed_all_points': False} + + +def test_prepare_fit_arrays_mighell_transforms_all(): + x = np.array([0.01, 0.02]) + y = np.array([0.5, 0.3]) + var = np.array([0.01, 0.04]) # All valid, but mighell transforms everything + + x_out, y_eff, weights, stats = _prepare_fit_arrays(x, y, var, 'mighell') + + assert np.allclose(x_out, x) + # y_eff = y + min(y, 1) = y + y (since y < 1) + assert y_eff[0] == pytest.approx(0.5 + 0.5) + assert y_eff[1] == pytest.approx(0.3 + 0.3) + # sigma = sqrt(y + 1) + assert weights[0] == pytest.approx(1.0 / np.sqrt(1.5)) + assert weights[1] == pytest.approx(1.0 / np.sqrt(1.3)) + assert stats == {'valid': 0, 'mighell_substituted': 2, 'masked': 0, 'transformed_all_points': True} + + +def test_fit_single_data_set_1d_hybrid_keeps_zero_variance_points(): + """Hybrid mode keeps all points (transforms zero-variance ones).""" + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model) # default objective='hybrid' + + captured = {} + mock_result = MagicMock() + mock_result.chi2 = 1.0 + mock_result.n_pars = 1 + + def _fake_fit(*, x, y, weights): + captured['x'] = x + captured['y'] = y + captured['weights'] = weights + return [mock_result] + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(side_effect=_fake_fit) + + data = DataSet1D( + name='hybrid_test', + x=np.array([0.01, 0.02, 0.03]), + y=np.array([1.0, 0.8, 0.6]), + ye=np.array([0.01, 0.0, 0.04]), + ) + + with pytest.warns(UserWarning, match='Mighell substitution'): + result = fitter.fit_single_data_set_1d(data) + + assert result is mock_result + # All 3 points should be passed through (not masked) + assert len(captured['x'][0]) == 3 + assert len(captured['y'][0]) == 3 + assert len(captured['weights'][0]) == 3 + + +def test_fit_single_data_set_1d_mighell_warning_mentions_all_points(): + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model, objective='mighell') + + mock_result = MagicMock() + mock_result.chi2 = 1.0 + mock_result.reduced_chi = 0.5 + mock_result.n_pars = 1 + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(return_value=[mock_result]) + fitter._fit_func = [lambda x: np.zeros_like(x)] + + data = DataSet1D( + name='mighell_warning', + x=np.array([0.01, 0.02, 0.03]), + y=np.array([1.0, 0.8, 0.6]), + ye=np.array([0.01, 0.02, 0.04]), + ) + + with pytest.warns(UserWarning, match=r'Applied Mighell transform to all 3 point\(s\)'): + fitter.fit_single_data_set_1d(data) + + +def test_classical_and_objective_chi_are_split_for_fit_results(): + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model, objective='mighell') + + fit_result = MagicMock() + fit_result.chi2 = 0.25 + fit_result.reduced_chi = 0.125 + fit_result.n_pars = 1 + fit_result.x = np.array([0.01, 0.02, 0.03]) + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(return_value=[fit_result]) + fitter.easy_science_multi_fitter._fit_objects = [MagicMock(interface=MagicMock())] + fitter.easy_science_multi_fitter._fit_objects[0].interface.sld_profile.return_value = ( + np.array([0.0, 1.0]), + np.array([1.0, 2.0]), + ) + + fitter._models = [MagicMock(unique_name='model_0', as_dict=MagicMock(return_value={'name': 'model_0'}))] + fitter._fit_func = [lambda x: np.array([0.8, 0.75, 0.7])] + + data = sc.DataGroup( + { + 'coords': {'Qz_0': sc.array(dims=['Qz_0'], values=np.array([0.01, 0.02, 0.03]), unit=sc.Unit('1/angstrom'))}, + 'data': {'R_0': sc.array(dims=['Qz_0'], values=np.array([1.0, 0.9, 0.7]), variances=np.array([0.01, 0.0, 0.04]))}, + 'attrs': {}, + } + ) + + analysed = fitter.fit(data) + + expected_classical_chi2 = ((1.0 - 0.8) / 0.1) ** 2 + ((0.7 - 0.7) / 0.2) ** 2 + expected_classical_reduced = expected_classical_chi2 / (2 - fit_result.n_pars) + + assert analysed['objective_chi2'] == pytest.approx(0.25) + assert analysed['objective_reduced_chi'] == pytest.approx(0.125) + assert analysed['classical_chi2'] == pytest.approx(expected_classical_chi2) + assert analysed['classical_reduced_chi'] == pytest.approx(expected_classical_reduced) + assert fitter.objective_chi2 == pytest.approx(0.25) + assert fitter.objective_reduced_chi == pytest.approx(0.125) + assert fitter.classical_chi2 == pytest.approx(expected_classical_chi2) + assert fitter.classical_reduced_chi == pytest.approx(expected_classical_reduced) + + +def test_fit_single_data_set_1d_all_zero_variance_hybrid_does_not_raise(): + """Hybrid mode handles all-zero-variance data without raising.""" + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model) # default objective='hybrid' + + captured = {} + mock_result = MagicMock() + mock_result.chi2 = 1.0 + mock_result.n_pars = 1 + + def _fake_fit(*, x, y, weights): + captured['x'] = x + captured['y'] = y + captured['weights'] = weights + return [mock_result] + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(side_effect=_fake_fit) + + data = DataSet1D( + name='all_zero_hybrid', + x=np.array([0.01, 0.02, 0.03]), + y=np.array([1.0, 0.8, 0.6]), + ye=np.array([0.0, 0.0, 0.0]), + ) + + with pytest.warns(UserWarning, match='Mighell substitution'): + result = fitter.fit_single_data_set_1d(data) + + assert result is mock_result + assert len(captured['x'][0]) == 3 + + +def test_fit_single_data_set_1d_legacy_mask_preserves_old_behavior(): + """Legacy mask mode drops zero-variance points and warns with old message.""" + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model, objective='legacy_mask') + + captured = {} + mock_result = MagicMock() + mock_result.chi2 = 1.0 + mock_result.n_pars = 1 + + def _fake_fit(*, x, y, weights): + captured['x'] = x + captured['y'] = y + captured['weights'] = weights + return [mock_result] + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(side_effect=_fake_fit) + + data = DataSet1D( + name='legacy_test', + x=np.array([0.01, 0.02, 0.03]), + y=np.array([1.0, 0.8, 0.6]), + ye=np.array([0.01, 0.0, 0.04]), + ) + + with pytest.warns(UserWarning, match='Masked 1 data point'): + result = fitter.fit_single_data_set_1d(data) + + assert result is mock_result + assert np.allclose(captured['x'][0], np.array([0.01, 0.03])) + assert np.allclose(captured['y'][0], np.array([1.0, 0.6])) + + +def test_fit_multi_dataset_hybrid_uses_transformed_y_and_weights(): + """Multi-dataset fit with hybrid objective transforms zero-variance points.""" + import scipp as sc + + qz_values = np.linspace(0.01, 0.3, 10) + r_values = np.exp(-qz_values * 50) + variances = np.ones_like(r_values) * 0.01 + variances[3:5] = 0.0 # 2 zero-variance points + + data = sc.DataGroup( + { + 'coords': {'Qz_0': sc.array(dims=['Qz_0'], values=qz_values)}, + 'data': {'R_0': sc.array(dims=['Qz_0'], values=r_values, variances=variances)}, + } + ) + + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model) + + captured = {} + + def _fake_fit(x, y, weights): + captured['x'] = x + captured['y'] = y + captured['weights'] = weights + mock_r = MagicMock() + mock_r.reduced_chi = 1.0 + mock_r.success = True + mock_r.chi2 = 1.0 + mock_r.n_pars = 1 + mock_r.x = x[0] + return [mock_r] + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(side_effect=_fake_fit) + fitter.easy_science_multi_fitter._fit_objects = [MagicMock()] + fitter.easy_science_multi_fitter._fit_objects[0].interface.sld_profile.return_value = ( + np.linspace(0, 100, 5), + np.ones(5), + ) + + import warnings + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + fitter.fit(data) + + # All 10 points should be present (not masked) + assert len(captured['x'][0]) == 10 + assert len(captured['y'][0]) == 10 + assert len(captured['weights'][0]) == 10 + + # Zero-variance points should have Mighell-transformed y + for idx in [3, 4]: + y_orig = r_values[idx] + expected_y_eff = y_orig + min(y_orig, 1.0) + assert captured['y'][0][idx] == pytest.approx(expected_y_eff) + + # Check that Mighell warning was emitted + mighell_warnings = [str(ww.message) for ww in w if 'Mighell substitution' in str(ww.message)] + assert len(mighell_warnings) == 1 + assert '2 zero-variance point(s)' in mighell_warnings[0] + + +def test_fit_warnings_objective_specific(): + """Verify that each objective mode produces the correct warning type.""" + import warnings + + model = Model() + model.interface = CalculatorFactory() + + mock_result = MagicMock() + mock_result.chi2 = 1.0 + mock_result.n_pars = 1 + + data = DataSet1D( + name='warn_test', + x=np.array([0.01, 0.02, 0.03]), + y=np.array([1.0, 0.8, 0.6]), + ye=np.array([0.01, 0.0, 0.04]), + ) + + for obj, expected_fragment in [ + ('legacy_mask', 'Masked 1 data point(s)'), + ('hybrid', 'Mighell substitution'), + ('mighell', 'Applied Mighell transform to all 3 point(s)'), + ]: + fitter = MultiFitter(model, objective=obj) + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(return_value=[mock_result]) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + fitter.fit_single_data_set_1d(data) + + matching = [str(ww.message) for ww in w if expected_fragment in str(ww.message)] + assert len(matching) > 0, f'No warning containing {expected_fragment!r} for objective={obj}' + + +def test_multifitter_constructor_rejects_bad_objective(): + model = Model() + model.interface = CalculatorFactory() + with pytest.raises(ValueError, match='Unknown objective'): + MultiFitter(model, objective='nonsense') + + +def test_fit_per_call_objective_override(): + """Per-call objective override in fit_single_data_set_1d works.""" + model = Model() + model.interface = CalculatorFactory() + fitter = MultiFitter(model, objective='hybrid') # default + + captured = {} + mock_result = MagicMock() + mock_result.chi2 = 1.0 + mock_result.n_pars = 1 + + def _fake_fit(*, x, y, weights): + captured['x'] = x + captured['y'] = y + captured['weights'] = weights + return [mock_result] + + fitter.easy_science_multi_fitter = MagicMock() + fitter.easy_science_multi_fitter.fit = MagicMock(side_effect=_fake_fit) + + data = DataSet1D( + name='override_test', + x=np.array([0.01, 0.02, 0.03]), + y=np.array([1.0, 0.8, 0.6]), + ye=np.array([0.01, 0.0, 0.04]), + ) + + # Override to legacy_mask — should drop the zero-variance point + with pytest.warns(UserWarning, match='Masked 1 data point'): + fitter.fit_single_data_set_1d(data, objective='legacy_mask') + + assert len(captured['x'][0]) == 2 # one point dropped diff --git a/tests/test_limits.py b/tests/test_limits.py new file mode 100644 index 00000000..e8320eb7 --- /dev/null +++ b/tests/test_limits.py @@ -0,0 +1,150 @@ +import numpy as np +import pytest +from easyscience import global_object +from easyscience.variable import Parameter + +from easyreflectometry.limits import SCALE_LIMITS +from easyreflectometry.limits import SLD_LIMITS +from easyreflectometry.limits import apply_default_limits + + +class TestApplyDefaultLimits: + def test_sld_with_inf_bounds(self): + param = Parameter('sld', 4.186, min=-np.inf, max=np.inf) + apply_default_limits(param, 'sld') + assert param.min == SLD_LIMITS[0] + assert param.max == SLD_LIMITS[1] + + def test_isld_with_inf_bounds(self): + param = Parameter('isld', 0.0, min=-np.inf, max=np.inf) + apply_default_limits(param, 'isld') + assert param.min == SLD_LIMITS[0] + assert param.max == SLD_LIMITS[1] + + def test_sld_preserves_finite_bounds(self): + param = Parameter('sld', 4.0, min=-2.0, max=8.0) + apply_default_limits(param, 'sld') + assert param.min == -2.0 + assert param.max == 8.0 + + def test_scale_with_inf_bounds(self): + param = Parameter('scale', 1.0, min=0, max=np.inf) + apply_default_limits(param, 'scale') + assert param.min == SCALE_LIMITS[0] + assert param.max == SCALE_LIMITS[1] + + def test_scale_preserves_finite_bounds(self): + param = Parameter('scale', 1.0, min=0.5, max=2.0) + apply_default_limits(param, 'scale') + assert param.min == 0.5 + assert param.max == 2.0 + + def test_thickness_percentage_limits(self): + param = Parameter('thickness', 10.0, min=0.0, max=np.inf) + apply_default_limits(param, 'thickness') + assert param.min == 0.0 # 0.0 is finite, not overwritten + assert param.max == 20.0 # 2.0 * 10.0 + + def test_thickness_both_inf(self): + param = Parameter('thickness', 10.0, min=-np.inf, max=np.inf) + apply_default_limits(param, 'thickness') + assert param.min == 5.0 # 0.5 * 10.0 + assert param.max == 20.0 # 2.0 * 10.0 + + def test_roughness_percentage_limits(self): + param = Parameter('roughness', 3.3, min=0.0, max=np.inf) + apply_default_limits(param, 'roughness') + assert param.min == 0.0 # 0.0 is finite, not overwritten + assert param.max == pytest.approx(6.6) # 2.0 * 3.3 + + def test_thickness_zero_value_unchanged(self): + param = Parameter('thickness', 0.0, min=0.0, max=np.inf) + apply_default_limits(param, 'thickness') + assert param.min == 0.0 + assert param.max == np.inf # unchanged, zero-valued skip + + def test_roughness_zero_value_unchanged(self): + param = Parameter('roughness', 0.0, min=-np.inf, max=np.inf) + apply_default_limits(param, 'roughness') + assert param.min == -np.inf + assert param.max == np.inf + + def test_thickness_preserves_finite_bounds(self): + param = Parameter('thickness', 10.0, min=2.0, max=25.0) + apply_default_limits(param, 'thickness') + assert param.min == 2.0 + assert param.max == 25.0 + + def test_dependent_parameter_skipped(self): + independent_param = Parameter('sld_main', 4.0, min=-np.inf, max=np.inf) + dependent_param = Parameter('sld_dep', 4.0, min=-np.inf, max=np.inf) + dependent_param.make_dependent_on(dependency_expression='a', dependency_map={'a': independent_param}) + apply_default_limits(dependent_param, 'sld') + assert np.isinf(dependent_param.min) + assert np.isinf(dependent_param.max) + + def test_unknown_kind_is_noop(self): + param = Parameter('foo', 5.0, min=-np.inf, max=np.inf) + apply_default_limits(param, 'unknown') + assert np.isinf(param.min) + assert np.isinf(param.max) + + +class TestIntegrationWithConstructors: + def setup_method(self): + global_object.map._clear() + + def test_material_gets_sld_limits(self): + from easyreflectometry.sample.elements.materials.material import Material + + mat = Material(sld=6.36, isld=0.0) + assert mat.sld.min == SLD_LIMITS[0] + assert mat.sld.max == SLD_LIMITS[1] + assert mat.isld.min == SLD_LIMITS[0] + assert mat.isld.max == SLD_LIMITS[1] + + def test_layer_gets_percentage_limits(self): + from easyreflectometry.project import Project + + project = Project() + project.default_model() + layer = project.models[0].sample[1].layers[0] + assert layer.thickness.min == 50.0 + assert layer.thickness.max == 200.0 + assert layer.roughness.min == 1.5 + assert layer.roughness.max == 6.0 + + def test_layer_constructor_keeps_default_bounds_until_project_sync(self): + from easyreflectometry.sample.elements.layers.layer import Layer + + layer = Layer(thickness=20.0, roughness=5.0) + assert layer.thickness.min == 0.0 + assert layer.thickness.max == np.inf + assert layer.roughness.min == 0.0 + assert layer.roughness.max == np.inf + + def test_layer_zero_thickness_unchanged(self): + from easyreflectometry.project import Project + + project = Project() + project.default_model() + layer = project.models[0].sample[0].layers[0] + assert layer.thickness.min == 0.0 + assert layer.thickness.max == np.inf + assert layer.roughness.min == 0.0 + assert layer.roughness.max == np.inf + + def test_model_gets_scale_limits(self): + from easyreflectometry.model.model import Model + + model = Model(scale=1.0) + assert model.scale.min == SCALE_LIMITS[0] + assert model.scale.max == SCALE_LIMITS[1] + + def test_existing_parameter_bounds_preserved(self): + from easyreflectometry.sample.elements.materials.material import Material + + custom_sld = Parameter('sld', 4.0, min=-0.5, max=7.0) + mat = Material(sld=custom_sld) + assert mat.sld.min == -0.5 + assert mat.sld.max == 7.0 diff --git a/tests/test_ort_file.py b/tests/test_ort_file.py index c547b1f5..8ef1de16 100644 --- a/tests/test_ort_file.py +++ b/tests/test_ort_file.py @@ -123,6 +123,7 @@ def fit_model(load_data): fitter1 = MultiFitter(multi_layer_model) fitter1.switch_minimizer(AvailableMinimizers.Bumps_simplex) + fitter1.easy_science_multi_fitter.max_evaluations = 3000 analysed = fitter1.fit(data) return analysed diff --git a/tests/test_project.py b/tests/test_project.py index b93df068..3c390960 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -268,6 +268,20 @@ def test_fitter_new_model_index(self): # Expect assert fitter_0 is not fitter_1 + def test_switch_calculator_rebuilds_model_bindings(self): + # When + project = Project() + project.default_model() + + # Then + project.calculator = 'refl1d' + reflectivity = project.model_data_for_model_at_index(0, np.array([0.01, 0.05, 0.1, 0.5])) + + # Expect + assert project.calculator == 'refl1d' + assert len(reflectivity.y) == 4 + assert np.all(np.isfinite(reflectivity.y)) + def test_experiments(self): # When project = Project() @@ -706,6 +720,35 @@ def test_parameters(self): assert len(parameters) == 14 assert isinstance(parameters[0], Parameter) + def test_parameters_enabled_flags(self): + global_object.map._clear() + project = Project() + project.default_model() + + sample = project.models[0].sample + superphase = sample.superphase + subphase = sample.subphase + middle_layer = sample[1].front_layer + + assert superphase.thickness.enabled is False + assert superphase.roughness.enabled is False + assert subphase.thickness.enabled is False + assert getattr(subphase.roughness, 'enabled', True) is True + assert getattr(middle_layer.thickness, 'enabled', True) is True + assert getattr(middle_layer.roughness, 'enabled', True) is True + + def test_parameters_read_does_not_overwrite_enabled_flag(self): + global_object.map._clear() + project = Project() + project.default_model() + + parameter = project.models[0].sample[0].layers[0].thickness + parameter.enabled = True + + _ = project.parameters + + assert parameter.enabled is True + def test_current_experiment_index_getter_and_setter(self): global_object.map._clear() project = Project()