In [None]:
%matplotlib inline


Fit Multiple Data Sets using pandas
======================

Fitting multiple (simulated) Gaussian data sets simultaneously.

All minimizers require the residual array to be one-dimensional. Therefore, in
the ``objective`` we need to ``flatten`` the array before returning it.

TODO: this should be using the Model interface / built-in models!


In [28]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import lmfit


def gauss(x, amp, cen, sigma):
    """Gaussian lineshape."""
    return amp * np.exp(-(x-cen)**2 / (2.*sigma**2))


def gauss_dataset(params, i, x):
    """Calculate Gaussian lineshape from parameters for data set."""
    amp = params['amp_%i' % (i+1)]
    cen = params['cen_%i' % (i+1)]
    sig = params['sig_%i' % (i+1)]
    return gauss(x, amp, cen, sig)


def objective(params, x, df):
    """Calculate total residual for fits of Gaussians to several data sets."""
    df['resid'] = np.zeros(len(df))
    
    for i in df['experiment'].unique():
        df[df['experiment'] == i]['resid'] = df[df['experiment'] == i]['y'] - gauss_dataset(params, i, x)

    return df['resid']

out = lmfit.minimize(objective, fit_params, args=(df['x'], df))

ValueError: cannot reindex from a duplicate axis

Create five simulated Gaussian data sets



In [20]:
length = 151
df = pd.DataFrame()
for i in np.arange(5):
    local_df = pd.DataFrame()
    local_df['experiment'] = i*np.ones(length)
    local_df['x'] = np.linspace(-1, 2, length)
    local_df['amp'] = 0.60 + 9.50*np.random.rand()
    local_df['cen'] = -0.20 + 1.20*np.random.rand()
    local_df['sig'] = 0.25 + 0.03*np.random.rand()
    local_df['y'] = gauss(local_df['x'], local_df['amp'], local_df['cen'], local_df['sig']) + np.random.normal(size=length, scale=0.1)
    df = df.append(local_df)

# display(df)

Create five sets of fitting parameters, one per data set



In [11]:
fit_params = lmfit.Parameters()
for iy, y in enumerate(df['experiment'].unique()):
    fit_params.add('amp_%i' % (iy+1), value=0.5, min=0.0, max=200)
    fit_params.add('cen_%i' % (iy+1), value=0.4, min=-2.0, max=2.0)
    fit_params.add('sig_%i' % (iy+1), value=0.3, min=0.01, max=3.0)

Constrain the values of sigma to be the same for all peaks by assigning
sig_2, ..., sig_5 to be equal to sig_1.



In [21]:
for iy in (2, 3, 4, 5):
    fit_params['sig_%i' % iy].expr = 'sig_1'

# display(fit_params)

Run the global fit and show the fitting result



In [19]:
out = lmfit.minimize(objective, fit_params, args=(df['x'], df))
report_fit(out.params)

(755, 6)


TypeError: '(0, slice(None, None, None))' is an invalid key

Plot the data sets and fits



In [None]:
plt.figure(figsize=(15,5))
for i in range(5):
    y_fit = gauss_dataset(out.params, i, x)
    plt.plot(x, data[i, :], 'o', x, y_fit, '-')
plt.show()