In [1]:
#This script renames all of the data within the directory so that it can be better analyzed
import os

# Base directory path
base_dir = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test'

# Traverse through all the folders in the base directory
for root, dirs, files in os.walk(base_dir):
    # Check if the directory is named 'data'
    if os.path.basename(root) == 'data':
        # Get the parent folder name (subfolder name)
        parent_folder_name = os.path.basename(os.path.dirname(root))
        
        # Iterate over all files in the 'data' folder
        for file_name in files:
            # Construct the full path to the current file
            old_file_path = os.path.join(root, file_name)
            
            # Construct the new file path with the parent folder name
            new_file_name = f"{parent_folder_name}{os.path.splitext(file_name)[1]}"
            new_file_path = os.path.join(root, new_file_name)
            
            # Rename the file
            os.rename(old_file_path, new_file_path)
            print(f"Renamed '{old_file_path}' to '{new_file_path}'")

print("Renaming completed.")

Renamed '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/order1TEST/data/order1TEST.csv' to '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/order1TEST/data/order1TEST.csv'
Renaming completed.


In [2]:
import os
import pandas as pd
import re

# Function to check if the stimulus is the trial fixation cross
def is_trial_fixation_cross(stimulus):
    if pd.notna(stimulus):
        # Remove whitespace and quotes
        stimulus_stripped = ''.join(stimulus.replace('"', '').replace("'", '').split())
        # Check if the stimulus matches the trial fixation cross
        if stimulus_stripped == '<pstyle=font-size:48px;><br><br>+<br><br></p>':
            return True
    return False

# Function to check if the stimulus is the green fixation cross
def is_green_fixation_cross(stimulus):
    if pd.notna(stimulus):
        if '+' in stimulus and 'color:#1a851a' in stimulus:
            return True
    return False

# Function to check if the stimulus is an instruction screen
def is_instruction_screen(stimulus):
    if pd.notna(stimulus):
        if 'Press enter to begin the trials' in stimulus or 'Please look at the fixation cross' in stimulus:
            return True
    return False

# Define the function to process CSV files
def process_csv(file_path, output_dir, image_output_dir):
    filename = os.path.basename(file_path)
    print(f"\nProcessing file: {filename}")

    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path, encoding='utf-8')
    except Exception as e:
        print(f"Error reading {filename}: {e}")
        return

    # Verify the required columns exist
    required_columns = ['trial_type', 'trial_index', 'rt', 'stimulus', 'response']
    if not all(column in df.columns for column in required_columns):
        print(f"File {filename} is missing required columns. Skipping.")
        return

    # Define the practice trials end marker
    practice_end_pattern = r'The practice trials have been completed.*?Again, please respond as quickly and accurately as possible'

    # Find the index where the practice trials end
    practice_end_indices = df[df['stimulus'].str.contains(practice_end_pattern, regex=True, na=False)].index
    if len(practice_end_indices) == 0:
        print(f"Practice trials end marker not found in {filename}. Skipping.")
        return
    else:
        practice_end_index = practice_end_indices[0]

    # Extract trials after the practice trials
    experiment_df = df.loc[practice_end_index + 1:].reset_index(drop=True)

    # Initialize variables
    current_trial = 0
    in_trial = False
    trial_data = []
    image_trial_data = []
    response_collected = False
    is_image_trial = False
    response_phase = 'NO RESPONSE'
    response_rt = None
    response_response = None
    stimulus_content = None

    # Iterate over the rows
    for idx, row in experiment_df.iterrows():
        stimulus = row['stimulus']
        response = row['response']
        rt = row['rt']
        trial_index = row['trial_index']

        # Skip instruction screens
        if is_instruction_screen(stimulus):
            continue

        # Skip until we find the first trial fixation cross
        if not in_trial:
            if is_trial_fixation_cross(stimulus):
                in_trial = True
                current_trial += 1
                stimulus_content = None
                response_collected = False
                is_image_trial = False
                response_phase = 'NO RESPONSE'
                response_rt = None
                response_response = None
            continue

        # Check for green fixation cross (marks the end of the trial)
        if is_green_fixation_cross(stimulus):
            if in_trial:
                trial_info = {
                    'Trial': current_trial,
                    'Response': response_response if response_collected else None,
                    'RT': response_rt if response_collected else None,
                    'Stimulus': stimulus_content,
                    'Response Time Category': response_phase
                }
                if is_image_trial:
                    image_trial_data.append(trial_info)
                else:
                    trial_data.append(trial_info)
                in_trial = False
            continue

        if in_trial:
            if pd.notna(stimulus) and ('<img' in stimulus or 'img' in stimulus):
                is_image_trial = True

            phase = None
            if pd.notna(stimulus):
                if '###' in stimulus:
                    phase = 'MASK'
                elif '+' in stimulus and 'color:red' in stimulus:
                    phase = 'FIXATION'
                elif '<p style=' in stimulus:
                    phase = 'WORD'
                    word_match = re.search(r'<p style=font-size:48px;><br><br>\s*(.*?)\s*<br><br></p>', stimulus)
                    if word_match:
                        stimulus_content = word_match.group(1).strip()

            if not response_collected and pd.notna(response) and pd.notna(rt):
                if not is_green_fixation_cross(stimulus):
                    response_collected = True
                    response_rt = rt
                    response_response = response
                    response_phase = phase

    if in_trial:
        trial_info = {
            'Trial': current_trial,
            'Response': response_response if response_collected else None,
            'RT': response_rt if response_collected else None,
            'Stimulus': stimulus_content,
            'Response Time Category': response_phase
        }
        if is_image_trial:
            image_trial_data.append(trial_info)
        else:
            trial_data.append(trial_info)

    if trial_data:
        paired_trial_data = []
        for i in range(0, len(trial_data), 2):
            trial1 = trial_data[i]
            trial2 = trial_data[i+1] if i+1 < len(trial_data) else None
            paired_info = {
                'Trial1': trial1['Trial'],
                'Response1': trial1['Response'],
                'RT1': trial1['RT'],
                'Stimulus1': trial1['Stimulus'],
                'Response Time Category1': trial1['Response Time Category'],
                'Trial2': trial2['Trial'] if trial2 else None,
                'Response2': trial2['Response'] if trial2 else None,
                'RT2': trial2['RT'] if trial2 else None,
                'Stimulus2': trial2['Stimulus'] if trial2 else None,
                'Response Time Category2': trial2['Response Time Category'] if trial2 else None
            }
            paired_trial_data.append(paired_info)

        output_df = pd.DataFrame(paired_trial_data)
        output_file = os.path.join(output_dir, f"{os.path.splitext(filename)[0]}_responses.csv")
        output_df.to_csv(output_file, index=False)

    if image_trial_data:
        image_output_df = pd.DataFrame(image_trial_data)
        image_output_file = os.path.join(image_output_dir, f"{os.path.splitext(filename)[0]}_image_trials.csv")
        image_output_df.to_csv(image_output_file, index=False)

# Main function to search for data folders and process files
def search_and_process_data(input_dir, output_dir, image_output_dir):
    for root, dirs, files in os.walk(input_dir):
        if 'data' in dirs:
            data_folder = os.path.join(root, 'data')
            for filename in os.listdir(data_folder):
                if filename.lower().endswith('.csv'):
                    file_path = os.path.join(data_folder, filename)
                    process_csv(file_path, output_dir, image_output_dir)

# Set the input and output directories
input_dir = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test'
output_dir = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/SessionFiles'
image_output_dir = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/CatchImages'

# Ensure the output directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(image_output_dir, exist_ok=True)

# Start the search and process
search_and_process_data(input_dir, output_dir, image_output_dir)



Processing file: order1TEST.csv


In [11]:
import pandas as pd
import glob
import os

# Directory containing your CSV files
directory = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/CatchImages/'

# Get a list of all CSV files in the directory
file_pattern = os.path.join(directory, '*.csv')
files = glob.glob(file_pattern)

# Print out the list of files
print(f"Files found: {files}")

# Answer key for the first 10 responses
default_answer_key = {
    0: 'z',
    1: 'z',
    2: 'm',
    3: 'z',
    4: 'm',
    5: 'm',
    6: 'z',
    7: 'm',
    8: 'z',
    9: 'm'
}

# Reversed answer key for files with a capital 'B' in the name
reversed_answer_key = {
    0: 'm',
    1: 'm',
    2: 'z',
    3: 'm',
    4: 'z',
    5: 'z',
    6: 'm',
    7: 'z',
    8: 'm',
    9: 'z'
}

# List to store results
data_list = []

for file in files:
    try:
        df = pd.read_csv(file)
        print(f"Processing file: {file}")  # Check which file is being processed
        print(df.head())  # Preview the first few rows
        
        # Take the first 10 responses
        df_subset = df.head(10).copy()
        
        # Check if the file name contains a capital 'B' and select the appropriate answer key
        if 'B' in os.path.basename(file):
            answer_key = reversed_answer_key
        else:
            answer_key = default_answer_key
        
        if not df_subset.empty:
            # Map the index (0-9) to the correct responses
            df_subset['CorrectResponse'] = df_subset.index.map(answer_key)
            
            # Compare the participant's response to the correct response
            df_subset['IsCorrect'] = df_subset['Response'] == df_subset['CorrectResponse']
            
            # Calculate accuracy
            total_trials = len(df_subset)
            correct_trials = df_subset['IsCorrect'].sum()
            accuracy = (correct_trials / total_trials) * 100 if total_trials > 0 else 0
            
            # Append the results
            data_list.append({'Filename': os.path.basename(file), 'Accuracy (%)': accuracy})
        else:
            data_list.append({'Filename': os.path.basename(file), 'Accuracy (%)': 'No Data'})
    except Exception as e:
        print(f"Error processing file {file}: {e}")
        data_list.append({'Filename': os.path.basename(file), 'Accuracy (%)': f'Error: {e}'})
        
# Create a DataFrame from the results
results_df = pd.DataFrame(data_list)

# Function to highlight rows with accuracy below 80%
def highlight_low_accuracy(row):
    if isinstance(row['Accuracy (%)'], (int, float)):
        if row['Accuracy (%)'] < 80:
            return ['background-color: red'] * len(row)
    return [''] * len(row)

# Apply the styling to the DataFrame
styled_df = results_df.style.apply(highlight_low_accuracy, axis=1)

# Explicitly display the styled DataFrame
display(styled_df)


Files found: ['/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/CatchImages/order1TESTB_image_trials.csv']
Processing file: /Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/CatchImages/order1TESTB_image_trials.csv
   Trial Response     RT  Stimulus Response Time Category
0      9        m   39.0       NaN               FIXATION
1     20        m  136.0       NaN               FIXATION
2     43        m  261.0       NaN               FIXATION
3     46        m  929.0       NaN               FIXATION
4     69        z   64.0       NaN                    NaN


Unnamed: 0,Filename,Accuracy (%)
0,order1TESTB_image_trials.csv,90.0


In [20]:
import os
import pandas as pd

# Define paths
base_dir = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/'
session_files_dir = os.path.join(base_dir, 'SessionFiles')
accuracy_output_dir = os.path.join(base_dir, 'Accuracy')

# Ensure accuracy output directory exists
if not os.path.exists(accuracy_output_dir):
    os.makedirs(accuracy_output_dir)

def process_responses_file(responses_file):
    # Load the responses CSV
    responses_path = os.path.join(session_files_dir, responses_file)
    responses_df = pd.read_csv(responses_path)

    # Extract folder name (without "_responses.csv") from the responses file
    folder_name = responses_file.replace('_responses.csv', '')

    # Check if it has a capital B
    reverse_flag = 'B' in folder_name

    # Construct the corresponding folder path in Lexical_Decisions_Run
    matching_folder = folder_name  # Use the exact folder name including any capital B
    folder_path = os.path.join(base_dir, matching_folder)

    # Determine the corresponding HTML1{letter}ALL.csv
    csv_letter = folder_name[folder_name.index('order') + len('order'):folder_name.index('a') + 1]
    matching_csv = f'HTML{csv_letter}ALL.csv'
    matching_csv_path = os.path.join(folder_path, matching_csv)

    # Check if the CSV exists in the folder
    if not os.path.exists(matching_csv_path):
        print(f"File {matching_csv_path} not found. Skipping {responses_file}.")
        return

    # Load the HTML CSV
    html_df = pd.read_csv(matching_csv_path)

    # Initialize accuracy columns
    responses_df['Accuracy1'] = 'no'
    responses_df['Accuracy2'] = 'no'

    # Reverse correct responses if needed
    if reverse_flag:
        html_df['Correct1'] = html_df['Correct1'].replace({'z': 'm', 'm': 'z'})
        html_df['Correct2'] = html_df['Correct2'].replace({'z': 'm', 'm': 'z'})

    # Compare Correct1 and Correct2 with Response1 and Response2
    responses_df['Accuracy1'] = responses_df.apply(lambda row: 'yes' if row['Response1'] == html_df.loc[row.name, 'Correct1'] else 'no', axis=1)
    responses_df['Accuracy2'] = responses_df.apply(lambda row: 'yes' if row['Response2'] == html_df.loc[row.name, 'Correct2'] else 'no', axis=1)

    # Save the results with accuracy columns
    output_file = os.path.join(accuracy_output_dir, f'{folder_name}_accuracy.csv')
    responses_df.to_csv(output_file, index=False)
    print(f'Saved accuracy file for {responses_file} at {output_file}')

# Process all response files in the SessionFiles directory
for file in os.listdir(session_files_dir):
    if file.endswith('_responses.csv'):
        process_responses_file(file)


Saved accuracy file for order1aB_responses.csv at /Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/Accuracy/order1aB_accuracy.csv


In [21]:
import pandas as pd
import glob
import os

# Directory containing your CSV files
directory = '/Users/rhardy/Desktop/1021DataTest/Lexical_Decisions_Run_Test/Accuracy'

# Get a list of all CSV files in the directory
file_pattern = os.path.join(directory, '*.csv')
files = glob.glob(file_pattern)

# List to store results
data_list = []

for file in files:
    try:
        df = pd.read_csv(file)
        
        # Ensure required columns are present
        if 'Accuracy1' in df.columns and 'Accuracy2' in df.columns:
            # Count correct responses in 'Accuracy1' and 'Accuracy2'
            correct1 = df['Accuracy1'].str.lower().eq('yes').sum()
            correct2 = df['Accuracy2'].str.lower().eq('yes').sum()
            total1 = df['Accuracy1'].notnull().sum()
            total2 = df['Accuracy2'].notnull().sum()
            
            # Calculate total correct responses and total possible responses
            total_correct = correct1 + correct2
            total_possible = total1 + total2
            accuracy = (total_correct / total_possible) * 100 if total_possible > 0 else 0
            
            # Append the results
            data_list.append({'Filename': os.path.basename(file), 'Accuracy (%)': accuracy})
        else:
            data_list.append({'Filename': os.path.basename(file), 'Accuracy (%)': 'Columns Missing'})
    except Exception as e:
        data_list.append({'Filename': os.path.basename(file), 'Accuracy (%)': f'Error: {e}'})

# Create a DataFrame from the results
results_df = pd.DataFrame(data_list)

# Function to highlight rows with accuracy below 80%
def highlight_low_accuracy(row):
    if isinstance(row['Accuracy (%)'], (int, float)):
        if row['Accuracy (%)'] < 80:
            return ['background-color: red'] * len(row)
    return [''] * len(row)

# Apply the styling to the DataFrame
styled_df = results_df.style.apply(highlight_low_accuracy, axis=1)

# Display the styled DataFrame
styled_df


Unnamed: 0,Filename,Accuracy (%)
0,order1aB_accuracy.csv,85.416667
