In [1]:
import os
import re
import pandas as pd
from typing import Dict, List, Optional

# ========== User Customization ==========
# Participant info
participant_id = 6                       # int, e.g., 0, 1, 2...
participant_id_str = f"{participant_id:02d}"
gender = "f"                             # "M" or "F" (case-insensitive)
gender = (gender or "").strip().lower()

# Conditions to look for (filename suffix before .txt)
conditions = ["v", "va", "vt", "vat"]

# Case-insensitive keyword that must appear in the filename
stroop_keyword = "stroop"

# Output excel path
output_excel = (r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD"
                r"\2025\Prototype B\Stroop_Results.xlsx")

# If multiple matching files for the same condition are found in the current folder,
# choose "latest" by mtime or "first"
choose_strategy = "latest"  # "latest" | "first"

# Search directory: current folder only (no recursion)
# You can also use: search_dir = os.path.dirname(__file__)
search_dir = os.getcwd()
# ========== END ==========


def find_condition_files_current_folder(
    folder: str,
    conds: List[str],
    keyword: str,
    choose: str = "latest",
) -> Dict[str, Optional[str]]:
    """
    Search only the given `folder` (non-recursive) for files whose names contain `keyword`
    (case-insensitive) and end with _{condition}.txt, where condition in `conds`.
    Returns a dict: {condition: filepath or None if not found}.
    """
    cond_group = "|".join(map(re.escape, conds))
    pattern = re.compile(rf"(?i){re.escape(keyword)}.*_({cond_group})\.txt$")

    # temp store: condition -> list[(path, mtime)]
    found: Dict[str, List[tuple]] = {c: [] for c in conds}

    if not os.path.isdir(folder):
        print(f"[WARN] Not a directory: {folder}")
        return {c: None for c in conds}

    for fn in os.listdir(folder):
        full_path = os.path.join(folder, fn)
        if not os.path.isfile(full_path):
            continue

        lower = fn.lower()
        if keyword.lower() not in lower:
            continue
        if not lower.endswith(".txt"):
            continue

        m = pattern.search(fn)
        if m:
            cond = m.group(1).lower()
            try:
                mtime = os.path.getmtime(full_path)
            except OSError:
                mtime = 0.0
            if cond in found:
                found[cond].append((full_path, mtime))

    chosen: Dict[str, Optional[str]] = {}
    for c in conds:
        candidates = found.get(c, [])
        if not candidates:
            chosen[c] = None
        else:
            if choose == "latest":
                candidates.sort(key=lambda x: x[1], reverse=True)
                chosen[c] = candidates[0][0]
            else:
                chosen[c] = candidates[0][0]
    return chosen


def read_stroop_and_compute(file_path: str) -> Optional[dict]:
    """
    Read one stroop .txt file (whitespace-separated, no header), label columns,
    filter Correct==1, compute mean RT for Condition==1 (congruent) and Condition==0 (incongruent).
    Return a dict of metrics, or None if the file cannot be processed.
    """
    try:
        # Read with whitespace separator; tolerate variable spaces
        df = pd.read_csv(file_path, sep=r"\s+", header=None, engine="python")
    except Exception as e:
        print(f"[WARN] Failed to read: {file_path} | {e}")
        return None

    expected_cols = ['Stage', 'Word', 'Ink', 'Condition', 'Col5', 'Col6', 'Correct', 'RT']
    if df.shape[1] < 8:
        print(f"[WARN] Unexpected column count ({df.shape[1]}) in {file_path}. Skipped.")
        return None
    elif df.shape[1] > 8:
        df = df.iloc[:, :8]
    df.columns = expected_cols

    # Coerce to numeric where needed
    for col in ['Condition', 'Correct', 'RT']:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Clean rows
    df = df.dropna(subset=['Condition', 'Correct', 'RT'])
    df_correct = df[df['Correct'] == 1]

    if df_correct.empty:
        print(f"[WARN] No correct trials in {file_path}. Skipped.")
        return None

    # Compute means
    cong = df_correct[df_correct['Condition'] == 1]['RT']
    incong = df_correct[df_correct['Condition'] == 0]['RT']

    mean_cong = float(cong.mean()) if not cong.empty else float("nan")
    mean_incong = float(incong.mean()) if not incong.empty else float("nan")
    stroop_effect = (
        mean_incong - mean_cong
        if pd.notna(mean_cong) and pd.notna(mean_incong) else float("nan")
    )

    return {
        'Congruent_RT': round(mean_cong, 2) if pd.notna(mean_cong) else None,
        'Incongruent_RT': round(mean_incong, 2) if pd.notna(mean_incong) else None,
        'Stroop_Effect': round(stroop_effect, 2) if pd.notna(stroop_effect) else None
    }


def condition_from_filename(file_name: str) -> str:
    """
    Extract condition token (v/va/vt/vat) from filename suffix. Fallback to 'unknown'.
    """
    m = re.search(r"_(v|va|vt|vat)\.txt$", file_name.lower())
    return m.group(1) if m else "unknown"


def main():
    # 1) Match files in current folder
    chosen_files = find_condition_files_current_folder(
        folder=search_dir,
        conds=conditions,
        keyword=stroop_keyword,
        choose=choose_strategy
    )

    print("=== Matched files by condition (current folder only) ===")
    for c, p in chosen_files.items():
        print(f"  {c}: {p if p else 'NOT FOUND'}")

    # 2) Compute results per condition
    rows = []
    for cond in conditions:
        file_path = chosen_files.get(cond)
        if not file_path:
            rows.append({
                'Participant': participant_id_str,
                'Gender': gender,
                'File': None,
                'Condition': cond,
                'Congruent_RT': None,
                'Incongruent_RT': None,
                'Stroop_Effect': None
            })
            continue

        metrics = read_stroop_and_compute(file_path)
        file_name = os.path.basename(file_path)
        cond_detected = condition_from_filename(file_name)

        if metrics is None:
            rows.append({
                'Participant': participant_id_str,
                'Gender': gender,
                'File': file_name,
                'Condition': cond_detected,
                'Congruent_RT': None,
                'Incongruent_RT': None,
                'Stroop_Effect': None
            })
        else:
            rows.append({
                'Participant': participant_id_str,
                'Gender': gender,
                'File': file_name,
                'Condition': cond_detected,
                **metrics
            })

    results_df = pd.DataFrame(rows)

    # 3) Append to Excel (or create it if missing)
    if os.path.exists(output_excel):
        try:
            existing = pd.read_excel(output_excel)
            with pd.ExcelWriter(output_excel, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
                start_row = len(existing) + 1
                results_df.to_excel(writer, index=False, header=False, startrow=start_row)
        except Exception as e:
            print(f"[WARN] Append failed. Creating new file. | {e}")
            results_df.to_excel(output_excel, index=False, header=True)
    else:
        results_df.to_excel(output_excel, index=False, header=True)

    print("\nAll data has been written!")
    print(results_df)


if __name__ == "__main__":
    main()


=== Matched files by condition (current folder only) ===
  v: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Stroop.data.2025-09-30--04-09_v.txt
  va: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Stroop.data.2025-09-30--04-51_va.txt
  vt: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Stroop.data.2025-09-30--04-24_vt.txt
  vat: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Stroop.data.2025-09-30--04-38_vat.txt

All data has been written!
  Participant Gender                                              File  \
0          06      f    PrototypeB_Stroop.data.2025-09-30--04-09_v.txt   
1          06      f   PrototypeB_Stroop.data.2025-09-30--04-51_va.txt   
2          06      f   PrototypeB_Stroop.data.2025-09-30--04-24_vt.txt   
3          06  

In [2]:
import os
import re
import pandas as pd
from typing import Dict, List, Optional

# ========== User Customization ==========
participant_id = 6
participant_id_str = f"{participant_id:02d}"

# Normalize to lowercase "m"/"f" and guard invalid inputs
gender = "f"
gender = (gender or "").strip().lower()
gender = gender if gender in {"m", "f"} else "m"

# Conditions to look for (filename suffix before .txt)
conditions = ["v", "va", "vt", "vat"]

# Case-insensitive keyword that must appear in the filename
keyword_memory = "memory"

# If multiple matching files for the same condition exist in this folder,
# choose "latest" by mtime or "first"
choose_strategy = "latest"  # "latest" | "first"

# Output excel path
output_excel = (r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD"
                r"\2025\Prototype B\Memory_Backward_corsi_Results.xlsx")

# Search directory: current folder only (no recursion)
search_dir = os.getcwd()
# ========== END ==========


def find_condition_files_current_folder(
    folder: str,
    conds: List[str],
    keyword: str,
    choose: str = "latest",
) -> Dict[str, Optional[str]]:
    """
    Search only the given `folder` (non-recursive) for files whose names contain `keyword`
    (case-insensitive) and end with _{condition}.txt, where condition in `conds`.
    Return: {condition: filepath or None if not found}.
    """
    cond_group = "|".join(map(re.escape, conds))
    pattern = re.compile(rf"(?i){re.escape(keyword)}.*_({cond_group})\.txt$")

    found: Dict[str, List[tuple]] = {c: [] for c in conds}

    if not os.path.isdir(folder):
        print(f"[WARN] Not a directory: {folder}")
        return {c: None for c in conds}

    for fn in os.listdir(folder):
        full_path = os.path.join(folder, fn)
        if not os.path.isfile(full_path):
            continue

        lower = fn.lower()
        if keyword.lower() not in lower:
            continue
        if not lower.endswith(".txt"):
            continue

        m = pattern.search(fn)
        if m:
            cond = m.group(1).lower()
            try:
                mtime = os.path.getmtime(full_path)
            except OSError:
                mtime = 0.0
            if cond in found:
                found[cond].append((full_path, mtime))

    chosen: Dict[str, Optional[str]] = {}
    for c in conds:
        candidates = found.get(c, [])
        if not candidates:
            chosen[c] = None
        else:
            if choose == "latest":
                candidates.sort(key=lambda x: x[1], reverse=True)
                chosen[c] = candidates[0][0]
            else:
                chosen[c] = candidates[0][0]
    return chosen


def read_memory_and_compute_max(file_path: str) -> Optional[float]:
    """
    Read a memory .txt file (whitespace-separated, no header) into 4 columns,
    keep rows where Col3 == 1, then return max value of Col1.
    Return None if the file can't be processed or no valid rows exist.
    """
    try:
        df = pd.read_csv(file_path, sep=r"\s+", header=None, engine="python")
    except Exception as e:
        print(f"[WARN] Failed to read: {file_path} | {e}")
        return None

    if df.shape[1] < 4:
        print(f"[WARN] Unexpected column count ({df.shape[1]}) in {file_path}. Skipped.")
        return None
    elif df.shape[1] > 4:
        df = df.iloc[:, :4]

    df.columns = ['Col1', 'Col2', 'Col3', 'Col4']

    # Coerce numerics just in case
    for col in ['Col1', 'Col3']:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    df = df.dropna(subset=['Col1', 'Col3'])

    subset = df[df['Col3'] == 1]
    if subset.empty:
        print(f"[WARN] No rows with Col3==1 in {file_path}.")
        return None

    max_item = subset['Col1'].max()
    return float(max_item) if pd.notna(max_item) else None


def condition_from_filename(file_name: str) -> str:
    """
    Extract condition token (v/va/vt/vat) from filename suffix. Fallback to 'unknown'.
    """
    m = re.search(r"_(v|va|vt|vat)\.txt$", file_name.lower())
    return m.group(1) if m else "unknown"


def main():
    # 1) Match files in current folder (keyword: "memory")
    chosen_files = find_condition_files_current_folder(
        folder=search_dir,
        conds=conditions,
        keyword=keyword_memory,
        choose=choose_strategy
    )

    print("=== Matched files by condition (current folder only) ===")
    for c, p in chosen_files.items():
        print(f"  {c}: {p if p else 'NOT FOUND'}")

    # 2) Compute results
    rows = []
    for cond in conditions:
        file_path = chosen_files.get(cond)
        if not file_path:
            rows.append({
                'Participant': participant_id_str,
                'Gender': gender,
                'File': None,
                'Condition': cond,
                'Max_Memory_Items': None
            })
            continue

        max_items = read_memory_and_compute_max(file_path)
        file_name = os.path.basename(file_path)
        cond_detected = condition_from_filename(file_name)

        rows.append({
            'Participant': participant_id_str,
            'Gender': gender,
            'File': file_name,
            'Condition': cond_detected,
            'Max_Memory_Items': max_items
        })

    results_df = pd.DataFrame(rows)

    # 3) Append to Excel (or create it if missing)
    if os.path.exists(output_excel):
        try:
            existing = pd.read_excel(output_excel)
            with pd.ExcelWriter(output_excel, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
                start_row = len(existing) + 1
                results_df.to_excel(writer, index=False, header=False, startrow=start_row)
        except Exception as e:
            print(f"[WARN] Append failed. Creating new file. | {e}")
            results_df.to_excel(output_excel, index=False, header=True)
    else:
        results_df.to_excel(output_excel, index=False, header=True)

    print("\nAll data has been written!")
    print(results_df)


if __name__ == "__main__":
    main()


=== Matched files by condition (current folder only) ===
  v: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Memory.data.2025-09-30--04-10_v.txt
  va: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Memory.data.2025-09-30--04-53_va.txt
  vt: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Memory.data.2025-09-30--04-25_vt.txt
  vat: g:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\06\PrototypeB_Memory.data.2025-09-30--04-40_vat.txt

All data has been written!
  Participant Gender                                              File  \
0          06      f    PrototypeB_Memory.data.2025-09-30--04-10_v.txt   
1          06      f   PrototypeB_Memory.data.2025-09-30--04-53_va.txt   
2          06      f   PrototypeB_Memory.data.2025-09-30--04-25_vt.txt   
3          06  