In [4]:
from __future__ import division
from scipy import stats
from scipy.stats import ttest_rel

import pyvttbl
from pyvttbl import DataFrame as DF
from pyvttbl.stats import Anova, Anova1way


In [5]:
os.chdir('/Users/kyle/Dropbox/Git/PFN/')

# Cue and Stim Phase Peaks coded by Cue in FFA
wpk_ffa=pd.read_csv("ffa_peaks.csv", index_col='idx')

#just grab the stimulus response
ffa_cstim = wpk_ffa[['hcF', 'ncF', 'fcF', 'hcH', 'ncH', 'fcH']]

###Wide-Format 
* each condition as a column
* each row is a subject
* values in matrix are observed values of DV 
    
###Long-Format
* columns are SubjectID and names of  IV(s), DV(s)
* SubjectID contain ID name for every combination if IV factors 
    * i.e. Subject 1 occurs 8 times if Design is  IV1(2 levels) X IV2(4 levels)

* **rows in IV column**: name of IV levels are levels
* **rows in DV column**: observed value for conditon name in that row

In [98]:
# Wide
ffa_cstim.head(3)

Unnamed: 0_level_0,hcF,ncF,fcF,hcH,ncH,fcH
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
8,0.3539,0.5399,0.3666,0.1057,0.182,0.0888
10,1.546,0.9781,0.9146,0.4596,0.1573,0.4097
11,0.2857,0.1608,0.2483,0.0158,0.2287,0.2283


In order to run a rep. measures anova we need the data to be in a long-format style, where we have a column for each of our IVs (all rows are levels of IV) and DVs (all rows the value observed for the condition in the IV column)

In [96]:
# Stack to multi-index with all column names nested under each subject idx
# then reset the index (single level), name the DV peak
ffa_cs_long = ffa_cstim.stack().reset_index(name='peak')
# old column names now rows (N times) and need a name 
# (defaults to "level_1" after reset)
ffa_cs_long.rename(columns={'level_1':'cstim'}, inplace=True)
ffa_cs_long.head(12)

Unnamed: 0,idx,cstim,peak
0,8,hcF,0.3539
1,8,ncF,0.5399
2,8,fcF,0.3666
3,8,hcH,0.1057
4,8,ncH,0.182
5,8,fcH,0.0888
6,10,hcF,1.546
7,10,ncF,0.9781
8,10,fcF,0.9146
9,10,hcH,0.4596


## 2-way repeated measures ANOVA 
we need to split up cstim so that we have separate columns for cue and stimulus for each trial 

#### Current Format:
* **cstim**: [hcF, ncF, fcF, hcH, ncH, fcH] x N

#### Transformed: 
* **stim**: [F, H] x N x 3 (cue levels)
* **cue**: [hc, nc, fc] x N x 2 (stim levels)

In [109]:
ffa_cs_long['cue']=[cs[:2] for cs in ffa_cs_long['cstim']]
ffa_cs_long['stim']=[cs[-1] for cs in ffa_cs_long['cstim']]

In [111]:
ffa_cs_long.head(12)

Unnamed: 0,idx,cstim,peak,cue,stim
0,8,hcF,0.3539,hc,F
1,8,ncF,0.5399,nc,F
2,8,fcF,0.3666,fc,F
3,8,hcH,0.1057,hc,H
4,8,ncH,0.182,nc,H
5,8,fcH,0.0888,fc,H
6,10,hcF,1.546,hc,F
7,10,ncF,0.9781,nc,F
8,10,fcF,0.9146,fc,F
9,10,hcH,0.4596,hc,H


##Run Repeated Measures ANOVA with pyvttbl

In [117]:
import pyvttbl
from pyvttbl import DataFrame as DF
from pyvttbl.stats import Anova

FFA_CStim = DF(ffa_cs_long)
FFA_CStim_RMA=Anova()
FFA_CStim_RMA.run(FFA_CStim, 'peak', wfactors=['cue', 'stim'], sub='idx')

os.chdir('/Users/kyle/Dropbox/PFH/iPFH/PK_ANOVA/')
# Write output to a file
with open('FFA_CStim_RMA.txt','wb') as f:
        f.write(str(FFA_CStim_RMA))

# Display output inline
print(FFA_CStim_RMA)

peak ~ cue * stim

TESTS OF WITHIN SUBJECTS EFFECTS

Measure: peak
  Source                           Type III    eps      df      MS       F        Sig.      et2_G   Obs.    SE     95% CI   lambda    Obs.  
                                      SS                                                                                              Power 
cue           Sphericity Assumed      0.046       -        2   0.023    0.477       0.624   0.004     38   0.037    0.072     1.008   0.126 
              Greenhouse-Geisser      0.046   0.899    1.798   0.025    0.477       0.605   0.004     38   0.037    0.072     1.008   0.122 
              Huynh-Feldt             0.046   0.899    1.798   0.025    0.477       0.605   0.004     38   0.037    0.072     1.008   0.122 
              Box                     0.046   0.500        1   0.046    0.477       0.498   0.004     38   0.037    0.072     1.008   0.103 
---------------------------------------------------------------------------------------

# RM ANOVA Including Region as a Factor

In [38]:

CSTIM_DF = DF(cstimdf.reset_index())
CStim_RMA=Anova()
CStim_RMA.run(CSTIM_DF, 'peak', wfactors=['cue', 'stim', 'region'], sub='idx')

os.chdir('/Users/kyle/Dropbox/PFH/iPFH/PK_ANOVA/')
# Write output to a file
with open('CUExSTIMxREGION_RMA.txt','wb') as f:
        f.write(str(CStim_RMA))

# Display output inline
print(CStim_RMA)

peak ~ cue * stim * region

TESTS OF WITHIN SUBJECTS EFFECTS

Measure: peak
   Source                            Type III    eps      df      MS       F        Sig.        et2_G     Obs.    SE     95% CI   lambda    Obs.  
                                        SS                                                                                                  Power 
cue             Sphericity Assumed      0.020       -        2   0.010    0.351       0.706       0.002     76   0.020    0.039     1.481   0.166 
                Greenhouse-Geisser      0.020   0.931    1.861   0.011    0.351       0.691       0.002     76   0.020    0.039     1.481   0.161 
                Huynh-Feldt             0.020   0.931    1.861   0.011    0.351       0.691       0.002     76   0.020    0.039     1.481   0.161 
                Box                     0.020   0.500        1   0.020    0.351       0.561       0.002     76   0.020    0.039     1.481   0.129 
------------------------------------------

# 1Way ANOVA Example (not appropriate for repeated measures!)

In [19]:
from pyvttbl import Anova1way
d = [[21.0, 20.0, 26.0, 46.0, 35.0, 13.0, 41.0, 30.0, 42.0, 26.0],
     [23.0, 30.0, 34.0, 51.0, 20.0, 38.0, 34.0, 44.0, 41.0, 35.0],
     [35.0, 35.0, 52.0, 29.0, 54.0, 32.0, 30.0, 42.0, 50.0, 21.0],
     [44.0, 40.0, 33.0, 45.0, 45.0, 30.0, 46.0, 34.0, 49.0, 44.0],
     [39.0, 44.0, 51.0, 47.0, 50.0, 45.0, 39.0, 51.0, 39.0, 55.0]]
conditions_list = 'Contact Hit Bump Collide Smash'.split()
D=Anova1way()
D.run(d, conditions_list=conditions_list)
print(D)

Anova: Single Factor on Measure

SUMMARY
Groups    Count   Sum   Average   Variance 
Contact      10   300        30    116.444 
Hit          10   350        35     86.444 
Bump         10   380        38    122.222 
Collide      10   410        41     41.556 
Smash        10   460        46     33.333 

O'BRIEN TEST FOR HOMOGENEITY OF VARIANCE
Source of Variation       SS       df      MS         F     P-value   eta^2   Obs. power 
Treatments             68081.975    4   17020.494   1.859     0.134   0.142        0.498 
Error                 412050.224   45    9156.672                                        
Total                 480132.199   49                                                    

ANOVA
Source of Variation    SS    df   MS      F     P-value   eta^2   Obs. power 
Treatments            1460    4   365   4.562     0.004   0.289        0.837 
Error                 3600   45    80                                        
Total                 5060   49                     

# T-Test cStim Peaks

In [11]:
ppay=[wpk_ppa["hcH"], wpk_ppa["fcH"], 
    wpk_ppa["fcF"], wpk_ppa["hcF"]]

ffay=[wpk_ffa["hcH"], wpk_ffa["fcH"], 
    wpk_ffa["fcF"], wpk_ffa["hcF"]]

for i, d in enumerate([ppay, ffay]):    
      print ttest_rel(d[0], d[1])
      print ttest_rel(d[2], d[3])

(-3.1517337984922698, 0.0055173157118805771)
(-1.4262617459612432, 0.17090713143942196)
(-0.70091747359790535, 0.49231689341504747)
(-2.5256425319695994, 0.021148387293976834)


# T-Test Cue Peaks

In [12]:
ppay=[wpk_ppa["hc"], wpk_ppa["fc"], 
    wpk_ppa["fc"], wpk_ppa["hc"]]

ffay=[wpk_ffa["hc"], wpk_ffa["fcH"], 
    wpk_ffa["fcF"], wpk_ffa["hcF"]]

for i, d in enumerate([ppay, ffay]):    
      print ttest_rel(d[0], d[1])
      print ttest_rel(d[2], d[3])

(-0.041906928813351216, 0.96703425001621868)
(0.041906928813351216, 0.96703425001621868)
(-1.0439516008872276, 0.31032619980940035)
(-2.5256425319695994, 0.021148387293976834)
