In [1]:
import json
from misc import *
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.multivariate.manova import MANOVA

  import pandas.util.testing as tm


In [2]:
ages = pd.read_excel("../data/Age.xlsx")

In [3]:
new_ages = {"participant": [], "age": []}
new_ages["participant"].extend(ages["vlPFC numbers"])
new_ages["participant"].extend(ages["sham numbers"])
new_ages["participant"].extend(ages["dlPFC on numbers"])
new_ages["participant"].extend(ages["dlPFC ooff numbers"])
new_ages["age"].extend(ages["vlPFC age"])
new_ages["age"].extend(ages["sham age"])
new_ages["age"].extend(ages["dlPFC on age"])
new_ages["age"].extend(ages["dlPFC off age"])

In [4]:
ages = pd.DataFrame(new_ages).dropna()

In [5]:
ages["participant"] = ages["participant"].apply(int)

In [6]:
ages.to_csv("../data/age.csv")

### Preprocessing for (M)AN(C)OVA

In [7]:
fnames = [
    "sham_study.pkl", "vlPFC_study.pkl", "eon_study.pkl", "eoff_study.pkl", 
    "esham_study.pkl", "evlPFC_study.pkl"
]
studies = []
for a in fnames:
    studies.append(
        Study.load_from_file(a)
    )

In [8]:
df = pd.DataFrame(
    {
        "group": sum([[i]*len(a) for i,a in enumerate(studies)], []),
        "AUROC": sum([a.compute_study_aucs() for a in studies], []),
        "hits": sum(
            [
                [b[0] for b in a.compute_hits_and_FAs()] 
                for a in studies
            ], []
        ),
        "FAs": sum(
            [
                [b[1] for b in a.compute_hits_and_FAs()] 
                for a in studies
            ], []
        ),
        "mean_RT": sum(
            [
                [np.mean(b[1]) for b in a.get_participant_RT()] 
                for a in studies
            ], []
        ),
        "std_RT": sum(
            [
                [np.std(b[1]) for b in a.get_participant_RT()] 
                for a in studies
            ], []
        ),
        "age": sum(
            [
                [
                    ages[
                        ages["participant"] == int(
                            op.split(b)[-1].split("_")[1]
                        )
                    ]["age"].values
                    for b in a.fns
                ] 
                for a in studies
            ], []
        )
    }
)

In [9]:
df["d_prime"] = df["hits"] - df["FAs"]

In [10]:
df["age"] = df["age"].apply(lambda x: 0 if len(x) < 1 else x[0])

In [11]:
df["hits_FAs_avg"] = 1/2*(df["hits"] + df["FAs"])

In [12]:
ru_df = df[df["group"] < 4]
en_df = df[df["group"] >= 4]

### ANOVA

I have recomputed one-way ANOVA for each accuracy measure (and reaction time).
Next slide shows formulae for ANOVA, the one after that shows the results (DF for group and residual, F and PR(>F)) for all data, Russian sample and English sample.

In [13]:
ANOVA_formulae = [
    "AUROC ~ group", 
    "hits ~ group", 
    "FAs ~ group", 
    "mean_RT ~ group", 
    "d_prime ~ group", 
]

In [14]:
ANOVA_PRs_RU = [
    anova_lm(ols(a, data=ru_df).fit()) for a in ANOVA_formulae
]
ANOVA_PRs_EN = [
    anova_lm(ols(a, data=en_df).fit()) for a in ANOVA_formulae
]
ANOVA_PRs = [
    anova_lm(ols(a, data=df).fit()) for a in ANOVA_formulae
]

In [15]:
ANOVA_output = pd.DataFrame(
    {
        "All group DF": [a.loc["group"]["df"] for a in ANOVA_PRs],
        "All residual DF": [a.loc["Residual"]["df"] for a in ANOVA_PRs],
        "All F": [a.loc["group"]["F"] for a in ANOVA_PRs],
        "All PR(>F)": [a.loc["group"]["PR(>F)"] for a in ANOVA_PRs], 
        "Russian group DF": [a.loc["group"]["df"] for a in ANOVA_PRs_RU],
        "Russian residual DF": [a.loc["Residual"]["df"] for a in ANOVA_PRs_RU],
        "Russian F": [a.loc["group"]["F"] for a in ANOVA_PRs_RU],
        "Russian PR(>F)": [a.loc["group"]["PR(>F)"] for a in ANOVA_PRs_RU],
        "English group DF": [a.loc["group"]["df"] for a in ANOVA_PRs_EN],
        "English residual DF": [a.loc["Residual"]["df"] for a in ANOVA_PRs_EN],
        "English F": [a.loc["group"]["F"] for a in ANOVA_PRs_EN],
        "English PR(>F)": [a.loc["group"]["PR(>F)"] for a in ANOVA_PRs_EN],  
    }
)
ANOVA_output.index = [a.split(" ~ ")[0] for a in ANOVA_formulae]

In [16]:
ANOVA_output.T

Unnamed: 0,AUROC,hits,FAs,mean_RT,d_prime
All group DF,1.0,1.0,1.0,1.0,1.0
All residual DF,135.0,135.0,135.0,135.0,135.0
All F,5.150597,1.638793,0.648791,17.545795,0.619711
All PR(>F),0.024824,0.202687,0.421962,5e-05,0.432535
Russian group DF,1.0,1.0,1.0,1.0,1.0
Russian residual DF,101.0,101.0,101.0,101.0,101.0
Russian F,1.483391,4.741309,0.015516,0.051098,6.049555
Russian PR(>F),0.226082,0.031775,0.901118,0.82162,0.015609
English group DF,1.0,1.0,1.0,1.0,1.0
English residual DF,32.0,32.0,32.0,32.0,32.0


### ANCOVA

I have computed one-way ANCOVA for each accuracy measure (and reaction time).
Next slide shows formulae for ANCOVA, the one after that shows the results (DF for group and residual, F and PR(>F)) for aRussian sample only since I have no age data for English sample.

In [17]:
ANCOVA_formulae = [
    "AUROC ~ C(group) + age", 
    "hits ~ C(group) + age", 
    "FAs ~ C(group) + age", 
    "mean_RT ~ C(group) + age", 
    "d_prime ~ C(group) + age", 
]

In [18]:
ANCOVA_PRs_RU = [
    anova_lm(ols(a, data=ru_df).fit()) for a in ANCOVA_formulae
]

In [19]:
ANCOVA_output = pd.DataFrame(
    {
        "Russian group DF": [a.loc["C(group)"]["df"] for a in ANCOVA_PRs_RU],
        "Russian age DF": [a.loc["age"]["df"] for a in ANCOVA_PRs_RU],
        "Russian residual DF": [a.loc["Residual"]["df"] for a in ANCOVA_PRs_RU],
        "Russian group F": [a.loc["C(group)"]["F"] for a in ANCOVA_PRs_RU],
        "Russian age F": [a.loc["age"]["F"] for a in ANCOVA_PRs_RU],
        "Russian group PR(>F)": [a.loc["C(group)"]["PR(>F)"] for a in ANCOVA_PRs_RU],
        "Russian age": [a.loc["age"]["PR(>F)"] for a in ANCOVA_PRs_RU], 
    }
)
ANCOVA_output.index = [a.split(" ~ ")[0] for a in ANCOVA_formulae]

In [20]:
ANCOVA_output.T

Unnamed: 0,AUROC,hits,FAs,mean_RT,d_prime
Russian group DF,3.0,3.0,3.0,3.0,3.0
Russian age DF,1.0,1.0,1.0,1.0,1.0
Russian residual DF,98.0,98.0,98.0,98.0,98.0
Russian group F,0.59692,1.742169,0.03245,0.696055,2.162573
Russian age F,0.515703,3.274961,3.823079,0.609771,0.180281
Russian group PR(>F),0.618511,0.163382,0.992101,0.556639,0.097304
Russian age,0.474389,0.07341,0.0534,0.436756,0.672062


### MANOVA

I have computed one-way MANOVA for each accuracy measure (and reaction time).
Next slide shows a formula for MANOVA, three last slides show the results for different MANOVA measures (Wilks lambda, Pillai's trace, Hotelling-Lawley trace and Roy's greatest root).

In [21]:
MANOVA_formulae = [
    "AUROC + hits + FAs + mean_RT + d_prime ~ C(group)", 
]

In [22]:
MANOVA_PRs_RU = [
    MANOVA.from_formula(a, data=ru_df).mv_test().summary() for a in MANOVA_formulae
]
MANOVA_PRs_EN = [
    MANOVA.from_formula(a, data=en_df).mv_test().summary() for a in MANOVA_formulae
]
MANOVA_PRs = [
    MANOVA.from_formula(a, data=df).mv_test().summary() for a in MANOVA_formulae
]

In [23]:
MANOVA_PRs[0]

0,1,2,3
,,,

0,1,2,3,4,5,6
,Intercept,Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.0249,4.0000,128.0000,1253.9272,0.0000
,Pillai's trace,1.0411,4.0000,128.0000,-810.5501,1.0000
,Hotelling-Lawley trace,36.5335,4.0000,128.0000,1169.0730,0.0000
,Roy's greatest root,36.4608,4.0000,128.0000,1166.7457,0.0000

0,1,2,3
,,,

0,1,2,3,4,5,6
,C(group),Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.6699,20.0000,425.4778,2.7310,0.0001
,Pillai's trace,0.3566,20.0000,524.0000,2.5644,0.0002
,Hotelling-Lawley trace,0.4537,20.0000,274.2465,2.8792,0.0001
,Roy's greatest root,0.3486,5.0000,131.0000,9.1324,0.0000


In [24]:
MANOVA_PRs_RU[0]

0,1,2,3
,,,

0,1,2,3,4,5,6
,Intercept,Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.0183,4.0000,96.0000,1286.9284,0.0000
,Pillai's trace,0.9817,4.0000,96.0000,1286.9284,0.0000
,Hotelling-Lawley trace,53.6220,4.0000,96.0000,1286.9284,0.0000
,Roy's greatest root,53.6220,4.0000,96.0000,1286.9284,0.0000

0,1,2,3
,,,

0,1,2,3,4,5,6
,C(group),Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.8396,12.0000,254.2836,1.4480,0.1446
,Pillai's trace,0.1663,12.0000,294.0000,1.4378,0.1478
,Hotelling-Lawley trace,0.1842,12.0000,163.7389,1.4607,0.1439
,Roy's greatest root,0.1367,4.0000,98.0000,3.3497,0.0129


In [25]:
MANOVA_PRs_EN[0]

0,1,2,3
,,,

0,1,2,3,4,5,6
,Intercept,Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.0066,4.0000,29.0000,1084.6073,0.0000
,Pillai's trace,1.2137,4.0000,29.0000,-41.1784,1.0000
,Hotelling-Lawley trace,116.4198,4.0000,29.0000,844.0438,0.0000
,Roy's greatest root,116.1341,4.0000,29.0000,841.9724,0.0000

0,1,2,3
,,,

0,1,2,3,4,5,6
,C(group),Value,Num DF,Den DF,F Value,Pr > F
,Wilks' lambda,0.6691,4.0000,29.0000,3.5857,0.0171
,Pillai's trace,0.3309,4.0000,29.0000,3.5857,0.0171
,Hotelling-Lawley trace,0.4946,4.0000,29.0000,3.5857,0.0171
,Roy's greatest root,0.4946,4.0000,29.0000,3.5857,0.0171
