# CURVE_FIT

## Overview
The `CURVE_FIT` function fits a user-defined model function to data using non-linear least squares, leveraging SciPy's `curve_fit` method. It is ideal for regression, parameter estimation, and curve fitting directly in Excel. This function finds parameters $\theta$ that minimize the sum of squared residuals:

```math
S(\theta) = \sum_{i=1}^n (y_i - f(x_i, \theta))^2
```

where $f(x_i, \theta)$ is the model function, $x_i$ are the input data, and $y_i$ are the observed values. This example function is provided as-is without any representation of accuracy.

## Usage
To use the function in Excel:

```excel
=CURVE_FIT(model, xdata, ydata, [p_zero])
```
- `model` (string, required): Model function as a string, e.g., "a * x + b".
- `xdata` (2D list, required): Input x values.
- `ydata` (2D list, required): Observed y values.
- `p_zero` (2D list, optional, default=None): Initial parameter guesses.

The function returns the fitted parameter values as a single row 2D list, or an error message string if the fit fails.

## Examples

**Example 1: Fitting a Straight Line (y = a * x + b)**

Sample Input Data:

| x | y |
|---|---|
| 1 | 2 |
| 2 | 4 |
| 3 | 6 |

In Excel:
```excel
=CURVE_FIT("a * x + b", {1;2;3}, {2;4;6}, {1,1})
```
Expected output:

| a   | b   |
|-----|-----|
| 2.0 | 0.0 |

**Example 2: Fitting an Exponential Model (y = a * exp(b * x))**

Sample Input Data:

| x | y  |
|---|----|
| 1 | 2.7|
| 2 | 7.4|
| 3 | 20.1|

In Excel:
```excel
=CURVE_FIT("a * exp(b * x)", {1;2;3}, {2.7;7.4;20.1}, {1,1})
```
Expected output:

| a   | b   |
|-----|-----|
| 1.0 | 1.0 |

**Example 3: Fitting a Decaying Exponential Model (y = a * exp(-b * x) + c)**

Sample Input Data:

| x | y  |
|---|----|
| 0 | 3.4|
| 1 | 2.7|
| 2 | 1.6|
| 3 | 1.1|
| 4 | 0.7|
| 5 | 0.6|

In Excel:
```excel
=CURVE_FIT("a * exp(-b * x) + c", {0;1;2;3;4;5}, {3.4;2.7;1.6;1.1;0.7;0.6}, {2,1,0.5})
```
Expected output:

| a   | b   | c   |
|-----|-----|-----|
| 2.0 | 1.0 | 0.5 |

## Limitations
- The model string must use `x` as the independent variable and parameter names (e.g., `a`, `b`).
- The number of initial guesses (if provided) must match the number of parameters in the model.
- If the fit fails, an error message is returned as a string.
- Only methods supported by SciPy's `curve_fit` are allowed (`trf`, `dogbox`, `lm`).

In [None]:
import numpy as np
from scipy.optimize import curve_fit as scipy_curve_fit
import math
SAFE_GLOBALS = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
SAFE_GLOBALS["np"] = np
SAFE_GLOBALS["numpy"] = np
SAFE_GLOBALS["exp"] = np.exp
SAFE_GLOBALS["log"] = np.log
SAFE_GLOBALS["sin"] = np.sin
SAFE_GLOBALS["cos"] = np.cos
SAFE_GLOBALS["tan"] = np.tan
SAFE_GLOBALS["abs"] = abs
SAFE_GLOBALS["pow"] = pow

def curve_fit(model, xdata, ydata, p_zero=None):
    """
    Fits a model to data using non-linear least squares (scipy.optimize.curve_fit).

    Args:
        model (str): Model function as a string, e.g., "a * x + b".
        xdata (list[list[float]]): 2D list of x values.
        ydata (list[list[float]]): 2D list of y values.
        p_zero (list[list[float]], optional): 2D list of initial parameter guesses. Default is None.

    Returns:
        list[list[float]]: Fitted parameter values as a single row, or error message string if the fit fails.

    This example function is provided as-is without any representation of accuracy.
    """
    # Check if 'x' is present in the model string
    if 'x' not in model:
        return "Model function expression must contain the variable 'x'."
    try:
        compile(model, '<string>', 'eval')
    except Exception as e:
        return f"Model expression syntax error: {e}"
    x = np.array(xdata).flatten()
    y = np.array(ydata).flatten()
    import re
    param_names = re.findall(r'\b[a-zA-Z_]\w*\b', model)
    param_names = [name for name in param_names if name not in ("x", "exp", "log", "sin", "cos", "tan", "abs", "pow")]
    param_names = list(dict.fromkeys(param_names))
    n_params = len(param_names)
    if p_zero is not None:
        p_zero_arr = np.array(p_zero).flatten()
        if len(p_zero_arr) != n_params:
            return f"Number of initial guesses (p_zero) does not match number of parameters in model: {param_names}"
    else:
        p_zero_arr = np.ones(n_params)
    def model_func(x, *params):
        local_dict = dict(zip(param_names, params))
        local_dict["x"] = x
        try:
            return eval(model, SAFE_GLOBALS, local_dict)
        except Exception as e:
            return f"Model evaluation error: {e}"
    try:
        popt, _ = scipy_curve_fit(model_func, x, y, p0=p_zero_arr, maxfev=10000)
        popt_rounded = np.round(popt, 2)
        return [popt_rounded.tolist()]
    except Exception as e:
        return str(e)

In [None]:
%pip install -q ipytest
import ipytest
ipytest.autoconfig()
import pytest

def approx_equal(a, b, rel=0.05, abs_tol=1e-4):
    if isinstance(a, float) and isinstance(b, float):
        return a == pytest.approx(b, rel=rel, abs=abs_tol)
    if (
        isinstance(a, list) and isinstance(b, list)
        and all(isinstance(x, list) for x in a)
        and all(isinstance(y, list) for y in b)
    ):
        return all(
            all(isinstance(x, float) and isinstance(y, float) and x == pytest.approx(y, rel=rel, abs=abs_tol) for x, y in zip(row_a, row_b))
            for row_a, row_b in zip(a, b)
        )
    return False

demo_cases = [
    ["a * x + b", [[1], [2], [3]], [[2.1], [3.8], [6.2]], [[1, 1]], [[2.05, -0.07]]],
    ["a * exp(b * x)", [[1], [2], [3]], [[2.5], [7.8], [19.5]], [[1, 1]], [[1.11, 0.96]]],
    ["a * exp(-b * x) + c", [[0], [1], [2], [3], [4], [5]], [[3.4], [2.7], [1.6], [1.1], [0.7], [0.6]], [[2, 1, 0.5]], [[3.75, 0.32, -0.26]]]
]

@pytest.mark.parametrize("model, xdata, ydata, p_zero, expected", demo_cases)
def test_demo_cases(model, xdata, ydata, p_zero, expected):
    result = curve_fit(model, xdata, ydata, p_zero)
    print(f"test_demo_cases output for {model}: {result}")
    assert approx_equal(result, expected, rel=0.05), f"Output {result} not within 5% of expected {expected}"

def test_invalid_param_count():
    result = curve_fit("a * x + b", [[1], [2], [3]], [[2], [4], [6]], [[1]])
    assert isinstance(result, str) and "does not match" in result

def test_invalid_model_expression():
    result = curve_fit("a ** x + +", [[1], [2], [3]], [[2], [4], [6]], [[1, 1]])
    assert isinstance(result, str) and ("error" in result.lower() or "invalid" in result.lower() or "does not match" in result.lower())

def test_missing_x():
    result = curve_fit("a + b", [[1], [2], [3]], [[2], [4], [6]], [[1, 1]])
    assert isinstance(result, str) and ("must contain the variable 'x'" in result)

ipytest.run('-s')

In [None]:
import gradio as gr
import matplotlib.pyplot as plt
import io
import base64
import numpy as np

def gradio_curve_fit(model, xdata, ydata):
    result = curve_fit(model, xdata, ydata)
    # Prepare result as 2D array for type='array' output
    if isinstance(result, str):
        result_array = [[result]]
        result_html = f'<div style="color:red;font-weight:bold;">{result}</div>'
    else:
        result_array = result
        result_html = ''
    # Prepare plot
    try:
        x = np.array(xdata).flatten()
        y = np.array(ydata).flatten()
        fig, ax = plt.subplots()
        ax.scatter(x, y, label="Data", color="blue")
        if isinstance(result, list) and isinstance(result[0], list):
            import re
            param_names = re.findall(r'\b[a-zA-Z_]\w*\b', model)
            param_names = [name for name in param_names if name not in ("x", "exp", "log", "sin", "cos", "tan", "abs", "pow")]
            param_names = list(dict.fromkeys(param_names))
            params = result[0]
            x_fit = np.linspace(np.min(x), np.max(x), 100)
            local_dict = dict(zip(param_names, params))
            local_dict["x"] = x_fit
            y_fit = eval(model, SAFE_GLOBALS, local_dict)
            ax.plot(x_fit, y_fit, label="Fitted", color="red")
            ax.legend()
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        plt.close(fig)
        buf.seek(0)
        img_base64 = base64.b64encode(buf.read()).decode("utf-8")
        img_html = f'<img src="data:image/png;base64,{img_base64}" style="max-width:100%;height:auto;" />'
    except Exception as e:
        img_html = f'<div style="color:red;font-weight:bold;">Plot error: {e}</div>'
    return result_array, img_html + result_html

demo = gr.Interface(
    fn=gradio_curve_fit,
    inputs=[
        gr.Textbox(label="Model (Python expression, e.g. a * x + b)", value=demo_cases[0][0]),
        gr.DataFrame(headers=["x"], label="x data", type="array", value=demo_cases[0][1]),
        gr.DataFrame(headers=["y"], label="y data", type="array", value=demo_cases[0][2]),
    ],
    outputs=[
        gr.DataFrame(label="Fitted Parameters", type="array"),
        gr.HTML(label="Plot and Result")
    ],
    examples=demo_cases,
    flagging_mode="never",
    fill_width=True,
)
demo.launch()