In [9]:
import pandas as pd
import os
from pathlib import Path
import janitor as jn
import warnings

# Grammatical gender post-test

In [10]:
warnings.filterwarnings('ignore')
# This turns off all warnings but aiming specifically at the Chained Assignment warning, because to my best knowledge the code should still work under pandas 3.0
# If it is not the case, remove this line of code to find out more

## Function to recode cumulative button press responses

In [5]:
def custom_diff(df, columns):

    for column in columns:
        # Prepare new column names
        new_column_name = column.replace('button_', '').replace('numClicks', 'numClicksDiff')
        # Initialize new column with zeros
        df[new_column_name] = 0

        for i in range(len(df)):
            if i == 0:
                # Keep first value unchanged...
                df[new_column_name].iloc[i] = df[column].iloc[i]
            elif df[column].iloc[i] == 0:
                # ... as well as any 0's - we want only differences if count of button presses increases
                df[new_column_name].iloc[i] = df[column].iloc[i]
            else:
                # For all other values, calculate the difference to previous row
                df[new_column_name].iloc[i] = df[column].iloc[i] - df[column].iloc[i-1]
                
    return df

In [13]:
# Set path - this should contain all files to be analyzed
# Can be from multiple participants (4+ files), or from one, which will be 4 files (2 blocks x 2 sessions)
# Output will be in 1:1 ratio (for each input file, one output file)

inputdir = "C:/Users/annas/OneDrive/Desktop/BCBL/Preprod_2 (Jupyter)/data/ggender"

# Create list of files
all_filepaths = list(Path(inputdir).rglob("*.csv"))

In [14]:
for filepath in all_filepaths:
    # Read the Excel file
    # Drop column with instructions as it will throw a tokenizing error with sep = comma
    # i.e instructions contain a comma, so the reader will attempt to split the data in more columns than exist for the rest of data
    df = pd.read_csv(filepath)

    # Drop practice trials
    df = df[df['item'].apply(lambda x: pd.notna(x))]

    #Re-code button responses (change cumulative to regular binary)
    df = custom_diff(df, ['button_femenine.numClicks', 'button_masculine.numClicks', 'button_idontknow.numClicks'])

    # Re-code button response into one column
    df['button_response'] = df.apply(lambda x: 'F' if (x['femenine.numClicksDiff'] == 1)
                                      else 'M' if (x['masculine.numClicksDiff'] == 1)
                                      else 'Idk', axis=1)

    # Determine response
    df['response'] = df.apply(lambda x: 'NaN' if (pd.isna(x['correct_response']))
                                      else 'Correct' if (x['femenine.numClicksDiff'] == 1 and x['correct_response'] == "F")
                                      else 'Correct' if (x['masculine.numClicksDiff'] == 1 and x['correct_response'] == "M")
                                      else 'Incorrect', axis=1)

    # Sanity check in case of an error, to know which file was last successfully processed
    print("Processing file:", filepath)
    
    # Select columns of interest
    df = df.loc[:, ['Subject ID', 'Language', 'List', 'item', 'correct_response', 'button_response', 'response']]
    
    # Get original file name
    excel_file_name = os.path.splitext(os.path.basename(filepath))[0]
    
    # Concatenate it with _compquest to name the output and save as Excel shet
    df.to_excel(f'{excel_file_name}_ggender.xlsx', index = False)

Processing file: C:\Users\annas\OneDrive\Desktop\BCBL\Preprod_2 (Jupyter)\data\ggender\4849_French_2.csv
Processing file: C:\Users\annas\OneDrive\Desktop\BCBL\Preprod_2 (Jupyter)\data\ggender\4849_Spanish_2.csv
