In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
np.random.seed(123)

In [3]:
def generate_data(identifier, size=100, loc=0., scale=1.):
    return [{"id": identifier, "value": value} for value in scale*np.random.randn(size) + loc]

In [4]:
df = pd.DataFrame(
    generate_data(1) + generate_data(2, loc=1, scale=2) + generate_data(3, loc=-1, scale=3)
)

In [5]:
print(df.sample(5))

     id     value
263   3 -2.750610
135   2  1.646938
285   3 -3.047614
258   3 -0.911071
154   2 -1.039310


In [6]:
def analyze(frame):
    params = stats.norm.fit(frame["value"])
    fwhm = 2*np.sqrt(2*np.log(2))*params[1]
    return pd.Series({"loc": params[0], "scale": params[1], "count": frame.shape[0], "fwhm": fwhm})

In [7]:
print(df.groupby("id").apply(analyze))

         loc     scale  count      fwhm
id                                     
1   0.027109  1.128240  100.0  2.656803
2   0.960929  1.940107  100.0  4.568603
3  -1.285394  2.908368  100.0  6.848684


In [8]:
print(type(df.groupby("id")))

<class 'pandas.core.groupby.generic.DataFrameGroupBy'>
