In [1]:
from analysis.analysis import create_analysis
from analysis.pipeline import create_pipeline
from analysis.stages import BooleanSummary, ClassifyColumns, FormatData, NumericalSummary, CategoricalSummary, Correlation, GroupWise
from analysis.stages import Stage
from typing import Any

import pandas as pd

In [18]:
test_data = pd.DataFrame(
    {
        "accuracy": [True, True, False, True],
        "reactionTime": [504, 386, 372, 489],
        "primerDir": ["right", "right", "right", "left"],
        "topFlankerDir": ["left", "right", "right", "left"],
        "bottomFlankerDir": ["right", "right", "left", "right"],
    }
)
categorical_columns = ["primerDir", "topFlankerDir", "bottomFlankerDir"]

for col in categorical_columns:
    test_data[col] = test_data[col].astype("category")

In [None]:
class DirectionAnalysis(Stage):
    """
    Find if the primer and flankers being in different directions affect reaction speed and accuracy
    """

    @classmethod
    def run(cls, data: dict[str, Any]) -> dict[str, Any]:

        primer_eq_top = data["data"]["primerDir"] == data["data"]["topFlankerDir"]
        primer_eq_bot = data["data"]["primerDir"] == data["data"]["bottomFlankerDir"]
        top_eq_bot = data["data"]["topFlankerDir"] == data["data"]["bottomFlankerDir"]
        all_eq = (data["data"]["topFlankerDir"] == data["data"]["bottomFlankerDir"]) & (data["data"]["topFlankerDir"] == data["data"]["bottomFlankerDir"])

        numerical_cols = data["columns"]["numerical"]


        # Define conditions
        conditions = {
            "primer_eq_top": test_data["primerDir"] == test_data["topFlankerDir"],
            "primer_eq_bot": test_data["primerDir"] == test_data["bottomFlankerDir"],
            "top_eq_bot": test_data["topFlankerDir"] == test_data["bottomFlankerDir"],
            "all_eq": (
                (test_data["primerDir"] == test_data["topFlankerDir"])
                & (test_data["topFlankerDir"] == test_data["bottomFlankerDir"])
            ),
        }

        # Create a DataFrame for all conditions with True/False labels
        melted_data = pd.concat(
        {
            condition: test_data.assign(
                Condition=condition, ConditionValue=mask
            ).loc[:, numerical_cols + ["Condition", "ConditionValue"]]
            for condition, mask in conditions.items()
        },
        ignore_index=True,
        )

        # Pivot table for mean and std grouped by Condition and ConditionValue (True/False)
        pivot_table = melted_data.pivot_table(
            index=["Condition", "ConditionValue"],
            values=["reactionTime", "accuracy"],
            aggfunc=["mean", "std"]
        )

        # Flatten multi-level column names
        pivot_table.columns = ["_".join(col) for col in pivot_table.columns]
        pivot_table.reset_index(inplace=True)

In [37]:

pivot_table

Unnamed: 0,Condition,ConditionValue,mean_accuracy,mean_reactionTime,std_accuracy,std_reactionTime
0,all_eq,False,0.666667,455.0,0.57735,72.270326
1,all_eq,True,1.0,386.0,,
2,primer_eq_bot,False,0.5,430.5,0.707107,82.731493
3,primer_eq_bot,True,1.0,445.0,0.0,83.4386
4,primer_eq_top,False,1.0,504.0,,
5,primer_eq_top,True,0.666667,415.666667,0.57735,63.89314
6,top_eq_bot,False,0.666667,455.0,0.57735,72.270326
7,top_eq_bot,True,1.0,386.0,,


In [3]:
preprocessor = (
    create_pipeline()
    .add_stage(FormatData)
    .add_stage(ClassifyColumns)
)

descriptive_analysis = (
    create_pipeline()
    .add_stage(ClassifyColumns)
    .add_stage(NumericalSummary)
    .add_stage(BooleanSummary)
    .add_stage(CategoricalSummary)
)


distribution_analysis = (create_pipeline())
intervariable_analysis = (create_pipeline().add_stage(Correlation).add_stage(GroupWise))

analysis = (create_analysis()
            .add_preprocessor(preprocessor)
            .add_pipeline(descriptive_analysis)
            .add_pipeline(distribution_analysis)
            .add_pipeline(intervariable_analysis))

arrow_game_analysis = analysis.run(test_data)

In [4]:
arrow_game_analysis["summary"]["numerical"]

Unnamed: 0,reactionTime
count,4.0
mean,437.75
std,68.353859
min,372.0
25%,382.5
50%,437.5
75%,492.75
max,504.0


In [5]:
arrow_game_analysis["summary"]["boolean"]

Unnamed: 0,accuracy
mean,0.75
std,0.5


In [6]:
arrow_game_analysis["summary"]["categorical"]

Unnamed: 0,primerDir,topFlankerDir,bottomFlankerDir
left,,0.5,0.25
right,1.0,0.5,0.75


In [7]:
arrow_game_analysis["intervariable"]["correlation"]

Unnamed: 0,reactionTime
reactionTime,1.0


In [8]:
arrow_game_analysis["intervariable"]["groupwise"]["primerDir"]

Unnamed: 0_level_0,reactionTime,reactionTime,accuracy,accuracy
Unnamed: 0_level_1,mean,std,mean,std
primerDir,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
right,437.75,68.353859,0.75,0.5


In [9]:
arrow_game_analysis["intervariable"]["groupwise"]["topFlankerDir"]

Unnamed: 0_level_0,reactionTime,reactionTime,accuracy,accuracy
Unnamed: 0_level_1,mean,std,mean,std
topFlankerDir,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
left,496.5,10.606602,1.0,0.0
right,379.0,9.899495,0.5,0.707107


In [10]:
arrow_game_analysis["intervariable"]["groupwise"]["bottomFlankerDir"]

Unnamed: 0_level_0,reactionTime,reactionTime,accuracy,accuracy
Unnamed: 0_level_1,mean,std,mean,std
bottomFlankerDir,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
left,372.0,,0.0,
right,459.666667,64.236542,1.0,0.0


In [11]:
arrow_game_analysis["intervariable"]["groupwise"]["accuracy"]

Unnamed: 0_level_0,reactionTime,reactionTime
Unnamed: 0_level_1,mean,std
accuracy,Unnamed: 1_level_2,Unnamed: 2_level_2
False,372.0,
True,459.666667,64.236542
