In [6]:
import os
from docx import Document
from rapidfuzz import fuzz, process

# ============ 1. AUTO-DETECT WORKING DIRECTORY ============
try:
    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
except NameError:
    BASE_DIR = os.getcwd()

QUESTIONNAIRE_DIR = os.path.join(BASE_DIR, "Questionnaires")
QUESTION_LOOKUP_FILE = os.path.join(BASE_DIR, "questionnaire_questions.txt")
OUTPUT_FILE = os.path.join(BASE_DIR, "questionnaire_response_status.txt")

# ============ 2. Load Master List of Questions ============
with open(QUESTION_LOOKUP_FILE, "r", encoding="utf-8") as f:
    KNOWN_QUESTIONS = [line.strip() for line in f if line.strip()]

# ============ 3. Function to Check if Text is a Known Question ============
def is_question(text, threshold=90):
    text = text.strip()
    if not text:
        return True  # Treat blank lines as non-responses

    match, score, _ = process.extractOne(text, KNOWN_QUESTIONS, scorer=fuzz.partial_ratio)
    return score >= threshold

# ============ 4. Main Logic ============
summary = []

for filename in os.listdir(QUESTIONNAIRE_DIR):
    if not filename.lower().endswith(".docx"):
        continue

    filepath = os.path.join(QUESTIONNAIRE_DIR, filename)
    doc = Document(filepath)

    response_lines = []

    # Check all paragraphs
    for para in doc.paragraphs:
        txt = para.text.strip()
        if txt and not is_question(txt):
            response_lines.append(txt)

    # Check all table cells
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                txt = cell.text.strip()
                if txt and not is_question(txt):
                    response_lines.append(txt)

    # Determine status
    if len(response_lines) == 0:
        summary.append(f"{filename}: ❌ Empty (no responses)")
    else:
        summary.append(f"{filename}: ✅ Has responses")

# ============ 5. Write to Output File ============
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    for line in sorted(summary):
        f.write(line + "\n")

print(f"✅ Status written to: {OUTPUT_FILE}")


✅ Status written to: d:\AAAA_Data\GENDER\questionnaire_response_status.txt
