In [1]:
import uuid
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
np.random.seed(123)

In [3]:
def create_dataset(id_gen=uuid.uuid4, law=stats.norm, size=300):
    return pd.DataFrame({"value": law.rvs(size=size)}).assign(id=id_gen())

In [4]:
data = pd.concat([create_dataset() for _ in range(5)])

In [5]:
data.groupby("id").describe()

Unnamed: 0_level_0,value,value,value,value,value,value,value,value
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
06a6aeae-1415-434f-8183-70214fe6e86e,300.0,0.000852,0.979152,-2.788113,-0.632387,0.027661,0.67699,2.55985
7c904f58-640d-4f68-8710-14d9b2ea9177,300.0,-0.029186,1.028261,-3.231055,-0.733151,-0.018228,0.666319,2.958625
91af2683-ab09-495b-a429-00d994a8ec77,300.0,0.089522,0.873965,-2.159442,-0.522507,0.141241,0.654089,2.371388
a4a2e0c7-5f42-4637-9f2e-13e844d9edd3,300.0,-0.045189,0.970155,-3.167055,-0.691631,-0.018601,0.5819,2.555894
f0bb5a17-c731-467b-b2ee-d95a15801ec1,300.0,-0.048523,1.013362,-2.794472,-0.692185,-0.07748,0.715517,3.571579


In [6]:
def fit(x, law=stats.norm):
    parameters = law.fit(x)
    return pd.Series({
        "mean": parameters[0],
        "std": parameters[1],
        "data": x.values.squeeze(),
        "rv": law(*parameters)
    })

In [7]:
groups = data.groupby("id").apply(fit)
groups

Unnamed: 0_level_0,mean,std,data,rv
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
06a6aeae-1415-434f-8183-70214fe6e86e,0.000852,0.977518,"[0.7650548459970179, -0.8289888337610103, -0.6...",<scipy.stats._distn_infrastructure.rv_continuo...
7c904f58-640d-4f68-8710-14d9b2ea9177,-0.029186,1.026545,"[-1.0856306033005612, 0.9973454465835858, 0.28...",<scipy.stats._distn_infrastructure.rv_continuo...
91af2683-ab09-495b-a429-00d994a8ec77,0.089522,0.872507,"[1.1117017451038067, 0.18095714574188698, 1.33...",<scipy.stats._distn_infrastructure.rv_continuo...
a4a2e0c7-5f42-4637-9f2e-13e844d9edd3,-0.045189,0.968537,"[0.5513022180697554, 0.41958914519806184, 1.81...",<scipy.stats._distn_infrastructure.rv_continuo...
f0bb5a17-c731-467b-b2ee-d95a15801ec1,-0.048523,1.011672,"[1.1406562429474632, -0.7881655588876596, 0.26...",<scipy.stats._distn_infrastructure.rv_continuo...


In [11]:
print(groups.to_html())

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>mean</th>
      <th>std</th>
      <th>data</th>
      <th>rv</th>
    </tr>
    <tr>
      <th>id</th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>06a6aeae-1415-434f-8183-70214fe6e86e</th>
      <td>0.000852</td>
      <td>0.977518</td>
      <td>[0.7650548459970179, -0.8289888337610103, -0.6591513106966477, 0.6111235500559109, -0.14401334748029268, 1.3166055958635523, -0.7043421471287583, 0.7506099168684812, 0.3426379813692278, -0.12643756370963435, 1.1759107718346071, 0.6800715328674297, -1.004967153382104, 0.6402186804198472, 1.374990631494199, -0.13044468898462372, -0.24865585038497331, -0.6696471476435246, -0.013603885675351006, 0.68620068605291, -0.8176682995787476, -1.34635756073724, -0.37574991098192634, -1.37972497865875, 0.5232184412826316, -0.4266897699938011, -1.7554018445508066, -0.34860751473850

In [8]:
def hypothesis_test(x, test=stats.kstest):
    check = test(x["data"], x["rv"].cdf)
    return pd.Series({
        "statistic": check.statistic,
        "pvalue": check.pvalue,
    })

In [9]:
final = pd.concat([
    groups,
    groups.apply(hypothesis_test, axis=1)
], axis=1)

In [14]:
print(final.to_html())

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>mean</th>
      <th>std</th>
      <th>data</th>
      <th>rv</th>
      <th>statistic</th>
      <th>pvalue</th>
    </tr>
    <tr>
      <th>id</th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>06a6aeae-1415-434f-8183-70214fe6e86e</th>
      <td>0.000852</td>
      <td>0.977518</td>
      <td>[0.7650548459970179, -0.8289888337610103, -0.6591513106966477, 0.6111235500559109, -0.14401334748029268, 1.3166055958635523, -0.7043421471287583, 0.7506099168684812, 0.3426379813692278, -0.12643756370963435, 1.1759107718346071, 0.6800715328674297, -1.004967153382104, 0.6402186804198472, 1.374990631494199, -0.13044468898462372, -0.24865585038497331, -0.6696471476435246, -0.013603885675351006, 0.68620068605291, -0.8176682995787476, -1.34635756073724, -0.37574991098192634, -1.37972497865875, 