In [3]:
import pandas as pd
from statsmodels.stats.anova import AnovaRM
import os
import h5py
import numpy as np

In [4]:
def create_dataset(path1, path2):
    dataset = []

    for i, arg in enumerate((path1, path2)):
        for (path, subdirs, files) in os.walk(arg):
            for file in files:
                if file == 'data.hdf5':
                    data = h5py.File(os.path.join(path, file), 'r')
                    accuracy = np.mean(np.equal(data['targets'][()], data['actions'][()]).astype(np.float))
                    dataset.append([accuracy, path[path.rindex('/') + 1:], arg[arg.rindex('/') + 1:]])
    df = pd.DataFrame.from_records(dataset, columns=['accuracy', 'user', 'method'])
    return df
            


In [5]:
df = create_dataset('../experiments/gaze_study/x2t', '../experiments/gaze_study/default')
df

Unnamed: 0,accuracy,user,method
0,0.756554,10,x2t
1,0.886792,9,x2t
2,0.815094,3,x2t
3,0.697761,6,x2t
4,0.835821,11,x2t
5,0.631387,0,x2t
6,0.624088,1,x2t
7,0.872659,2,x2t
8,0.856604,8,x2t
9,0.696296,4,x2t


In [6]:
aovrm = AnovaRM(data=df, depvar='accuracy', subject='user', within=['method'])

In [7]:
res = aovrm.fit()
print(res)

               Anova
       Num DF  Den DF F Value Pr > F
------------------------------------
method 1.0000 11.0000 17.2310 0.0016



  data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)


In [None]:
df = create_dataset('../experiments/sim_uji/x2t', '../experiments/sim_uji/default')
aovrm = AnovaRM(data=df, depvar='accuracy', subject='user', within=['method'])
res = aovrm.fit()
print(res)

In [9]:
dataset = [[0, 1, 6, 3, 6, 5, 2, 6, 6, 2], [0, 2, 6, 6, 4, 2, 2, 6, 6, 2],
           [1, 1, 3, 4, 3, 5, 4, 4, 6, 2], [1, 2, 4, 4, 4, 5, 3, 4, 7, 1],
           [2, 1, 5, 1, 5, 4, 6, 2, 6, 1], [2, 2, 6, 5, 6, 2, 2, 1, 6, 1],
           [3, 1, 5, 2, 2, 2, 2, 2, 5, 2], [3, 2, 6, 5, 2, 2, 2, 5, 6, 2],
           [4, 1, 3, 2, 3, 7, 5, 2, 5, 3], [4, 2, 6, 4, 4, 2, 1, 1, 6, 2],
           [5, 1, 4, 1, 1, 3, 1, 1, 6, 1], [5, 2, 5, 5, 2, 3, 1, 1, 7, 1],
           [6, 1, 5, 5, 5, 5, 3, 6, 6, 2], [6, 2, 4, 2, 4, 6, 7, 5, 4, 2],
           [7, 1, 4, 4, 5, 3, 4, 5, 6, 2], [7, 2, 5, 4, 5, 3, 3, 3, 5, 3],
           [8, 1, 5, 4, 5, 4, 2, 3, 2, 6], [8, 2, 5, 4, 5, 3, 2, 2, 3, 6],
           [9, 1, 3, 3, 4, 6, 5, 2, 6, 2], [9, 2, 6, 6, 6, 2, 2, 1, 7, 2],
           [10, 1, 5, 3, 4, 3, 3, 4, 5, 3], [10, 2, 6, 5, 5, 2, 2, 2, 6, 2],
           [11, 1, 6, 6, 6, 2, 2, 2, 6, 2], [11, 2, 6, 7, 6, 2, 2, 1, 7, 2]]


df = pd.DataFrame.from_records(dataset, columns=['user', 'phase', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8'])

In [10]:
print(df.loc[df['phase'] == 1].mean())
df.loc[df['phase'] == 2].mean()

user     5.500000
phase    1.000000
q1       4.500000
q2       3.166667
q3       4.083333
q4       4.083333
q5       3.250000
q6       3.250000
q7       5.416667
q8       2.333333
dtype: float64


user     5.500000
phase    2.000000
q1       5.416667
q2       4.750000
q3       4.416667
q4       2.833333
q5       2.416667
q6       2.666667
q7       5.833333
q8       2.166667
dtype: float64

In [11]:
for depvar in ('q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8'):
    aovrm = AnovaRM(data=df, depvar=depvar, subject='user', within=['phase'])
    res = aovrm.fit()
    print(res)

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  7.4358 0.0197

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  7.0533 0.0224

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  1.1579 0.3049

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  5.3922 0.0404

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  1.7742 0.2098

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  2.3744 0.1516

               Anova
      Num DF  Den DF F Value Pr > F
-----------------------------------
phase 1.0000 11.0000  2.0992 0.1753

               Anova
      Num DF  Den DF F Value Pr > F
---------------------------------