In [54]:
import json
with open("../UserTestWebPage/time_recording.json", 'r') as f:
    data = json.load(f)
parsed_data = {
    "Subject": [],
    "TaskID": [],
    "DeviceID": [],
    "Time": [],
}
for item in data:
    parsed_data["Subject"].append(item["userID"])
    parsed_data["TaskID"].append(item["taskID"])
    parsed_data["DeviceID"].append(item["deviceID"])
    if item['finished']:
        parsed_data["Time"].append(item["duration"])
    else:
        parsed_data["Time"].append(600)

In [55]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Your parsed data loading part is assumed to be correct and loaded into `parsed_data`
# For demonstration, let's assume `parsed_data` is already filled with your data as described

# Convert parsed_data into a pandas DataFrame
df = pd.DataFrame(parsed_data)

# Convert numeric columns to appropriate types
df['Subject'] = df['Subject'].astype('category')
df['DeviceID'] = df['DeviceID'].astype('category')
df['TaskID'] = df['TaskID'].astype('category')
df['Time'] = df['Time'].astype(float)

# Assuming task difficulty can be inferred directly from TaskID
# Mapping TaskID to task difficulty for demonstration purposes
task_difficulty_mapping = {
    '1': 'Easy', '2': 'Easy', '3': 'Easy',
    '4': 'Medium', '5': 'Medium', '6': 'Medium',
    '7': 'Hard', '8': 'Hard', '9': 'Hard'
}
df['TaskDifficulty'] = df['TaskID'].map(task_difficulty_mapping).astype('category')

# Now, fit a mixed-effects model
# Using Time as the dependent variable, DeviceID and TaskDifficulty as fixed effects,
# and a random intercept for subjects
model = smf.mixedlm("Time ~ DeviceID + TaskDifficulty", df, groups=df["Subject"])
result = model.fit()

# Print the summary of the model
print(result.summary())

                 Mixed Linear Model Regression Results
Model:                 MixedLM      Dependent Variable:      Time      
No. Observations:      108          Method:                  REML      
No. Groups:            12           Scale:                   13802.7603
Min. group size:       9            Log-Likelihood:          -651.8849 
Max. group size:       9            Converged:               Yes       
Mean group size:       9.0                                             
-----------------------------------------------------------------------
                          Coef.   Std.Err.   z    P>|z|  [0.025  0.975]
-----------------------------------------------------------------------
Intercept                 103.106   30.331  3.399 0.001  43.658 162.554
DeviceID[T.2]              57.034   27.692  2.060 0.039   2.759 111.308
DeviceID[T.3]             -14.933   27.692 -0.539 0.590 -69.208  39.341
TaskDifficulty[T.Hard]     91.125   27.692  3.291 0.001  36.851 145.400
TaskDiffi

In [4]:
import pandas as pd
from scipy import stats

# Assuming df is your DataFrame prepared as before

# ANOVA 1: Effect of DeviceID on Time
# We'll aggregate data by taking the mean time for each subject-device combination
device_means = df.groupby(['Subject', 'DeviceID'])['Time'].mean().reset_index()
f_val_device, p_val_device = stats.f_oneway(*[group['Time'].values for name, group in device_means.groupby('DeviceID')])
print(f'ANOVA for DeviceID: F = {f_val_device}, p = {p_val_device}')

# ANOVA 2: Effect of Task Difficulty on Time
# Similarly, take the mean time for each subject-task difficulty combination
difficulty_means = df.groupby(['Subject', 'TaskDifficulty'])['Time'].mean().reset_index()
f_val_difficulty, p_val_difficulty = stats.f_oneway(*[group['Time'].values for name, group in difficulty_means.groupby('TaskDifficulty')])
print(f'ANOVA for Task Difficulty: F = {f_val_difficulty}, p = {p_val_difficulty}')


ANOVA for DeviceID: F = 0.06777698052208132, p = 0.9345985904083561
ANOVA for Task Difficulty: F = 5.3545693328805175, p = 0.009683623824535028


  device_means = df.groupby(['Subject', 'DeviceID'])['Time'].mean().reset_index()
  f_val_device, p_val_device = stats.f_oneway(*[group['Time'].values for name, group in device_means.groupby('DeviceID')])
  difficulty_means = df.groupby(['Subject', 'TaskDifficulty'])['Time'].mean().reset_index()
  f_val_difficulty, p_val_difficulty = stats.f_oneway(*[group['Time'].values for name, group in difficulty_means.groupby('TaskDifficulty')])


In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.stats.anova import AnovaRM

# Example data: create a DataFrame for your data
data = {
    'Subject': ['1', '1', '1', '2', '2', '2', '3', '3', '3'],
    'Condition': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Score': [5, 7, 6, 4, 5, 5, 8, 9, 7]
}

df = pd.DataFrame(data)

# Make sure 'Subject' is treated as a categorical variable
df['Subject'] = df['Subject'].astype('category')
df['Condition'] = df['Condition'].astype('category')

In [None]:
# Define the model
anova_model = AnovaRM(df, depvar='Score', subject='Subject', within=['Condition'])
results = anova_model.fit()

# Print the results
print(results)