In [1]:
from   scipy  import stats
import pandas as pd
import numpy  as np

In [2]:
dataset = pd.read_csv('HRV(RMSSD) Data Set.csv')
dataset.columns = ['id', 'group'] + list(range(0, 21, 2))

# mean values per phase
groups  = dataset.copy()

groups['T1'] = groups.iloc[:,3:8].mean(axis = 1)   # [ 2, 10]
groups['T2'] = groups.iloc[:,4:8].mean(axis = 1)   # [ 4, 10]
groups['T3'] = groups.iloc[:,5:8].mean(axis = 1)   # [ 6, 10]
groups['T4'] = groups.iloc[:,6:8].mean(axis = 1)   # [ 8, 10]
groups['T5'] = groups.iloc[:,7]                    # [10, 10]
groups['T6'] = groups.iloc[:,8:10].mean(axis = 1)  # [12, 14]
groups['T7'] = groups.iloc[:,10:13].mean(axis = 1) # [14, 20]

groups  = groups.iloc[:,[0, 1, 2, -7, -6, -5, -4, -3, -2, -1]].copy()

# mean times ([2, 10], [12, 14], [16, 20])
groups.columns = ['id', 'group', 0, ' 2-10', ' 4-10', ' 6-10', ' 8-10', '10-10', '12-14', '16-20']

c1 = groups.columns[[0, 1, 3, 8, 9]]
c2 = groups.columns[[0, 1, 4, 8, 9]]
c3 = groups.columns[[0, 1, 5, 8, 9]]
c4 = groups.columns[[0, 1, 6, 8, 9]]
c5 = groups.columns[[0, 1, 7, 8, 9]]

g1 = groups[c1]
g2 = groups[c2]
g3 = groups[c3]
g4 = groups[c4]
g5 = groups[c5]

groups = [g1, g2, g3, g4, g5]

In [3]:
def compute_statistics(X, y):
    """ computes t-statistics and significances """
    theta = np.linalg.solve(X.T @ X, X.T @ y)
    m     = len(theta)
    r     = X @ theta - y
    s     = np.std(r, ddof = m)
    sed   = s * np.sqrt(np.diag(np.linalg.inv(X.T @ X)))
    alpha = 0.05
    t     = (theta - 0) / sed
    T     = stats.t(len(X) - m)
    c     = T.ppf(1 - alpha / 2) * sed
    ci    = np.array([theta, theta - c, theta + c])
    return t, 1 - T.cdf(t), ci
    
def experiment(dataset):
    X = []
    y = []
    for i in dataset.index:
        g = dataset.iloc[i, 1] == 'Group A'
        for j, time in enumerate(range(2, 21, 2)):
            x = np.zeros(11)
            x[j] = 1
            x[-1] = g
            X.append(x)
            y.append(dataset.iloc[i, 3 + j])

    X, y = map(np.array, [X, y])

    t, p, ci = compute_statistics(X, y)

    return ['theta(A)'], ci.T[[-1]], t[[-1]], p[[-1]], 'y(t, A) = theta(t) + g(A) · theta(A)'

def experiment_group(k):
    group = groups[k]
    X = []
    y = []
    for i in group.index:
        g = group.iloc[i,1] == 'Group A'
        for j, p in enumerate(range(3)):
            x = np.zeros(6)
            x[j] = 1
            x[j + 3] = g
            X.append(x)
            y.append(group.iloc[i, 2 + j])

    X, y = map(np.array, [X, y])

    theta    = ['theta(1, A)', 'theta(2, A)', 'theta(3, A)']
    t, p, ci = compute_statistics(X, y)
    eq       = f'y(p, A) = theta(p) + g(A) · theta(p, A),    p_1 = {list(range(2 * k + 2, 11, 2))}, p_2 = {list(range(12, 15, 2))}, p_3 = {list(range(16, 21, 2))}'
    ret      = theta, ci.T[3:], t[3:], p[3:], eq
    return ret

def pprint(theta, ci, tstat, pvalue, eq):
    print('parameter   | value  | confidence interval | t-statistic | p-value  | significance')
    print('------------+--------+---------------------+-------------+----------+-------------')
    for (p, (v, l, u), ts, pv) in zip(theta, ci, tstat, pvalue):
        s = '***' if pv < 0.001 else '**' if pv < 0.01 else '*' if pv < 0.05 else ''
        print(f'{p:11s} | {v:6.4f} | [{l:7.4f}, {u:7.4f}]  | {ts:8.6f}    | {pv:.6f} | {s}')
    print('----------------------------------------------------------------------------------')
    print()
    print(eq)

pprint(*experiment(dataset))

for i in range(5):
    print('\n\n')
    pprint(*experiment_group(i))

parameter   | value  | confidence interval | t-statistic | p-value  | significance
------------+--------+---------------------+-------------+----------+-------------
theta(A)    | 6.5436 | [ 3.4002,  9.6870]  | 4.089609    | 0.000025 | ***
----------------------------------------------------------------------------------

y(t, A) = theta(t) + g(A) · theta(A)



parameter   | value  | confidence interval | t-statistic | p-value  | significance
------------+--------+---------------------+-------------+----------+-------------
theta(1, A) | 6.3083 | [-3.6405, 16.2571]  | 1.252675    | 0.106118 | 
theta(2, A) | 6.9460 | [-3.0029, 16.8948]  | 1.379294    | 0.084908 | 
theta(3, A) | 6.6676 | [-3.2812, 16.6164]  | 1.324018    | 0.093736 | 
----------------------------------------------------------------------------------

y(p, A) = theta(p) + g(A) · theta(p, A),    p_1 = [2, 4, 6, 8, 10], p_2 = [12, 14], p_3 = [16, 18, 20]



parameter   | value  | confidence interval | t-statistic | p-value 