In [4]:
import os
import pandas as pd
from pathlib import Path

# ==========================
# 1️⃣ AUTO-DETECT WORKING DIRECTORY
# ==========================
try:
    BASE_DIR = Path(os.path.abspath(__file__)).parent
except NameError:
    BASE_DIR = Path.cwd()  # For Jupyter or interactive run

INPUT_DIR = BASE_DIR / "county_excel"
OUTPUT_DIR = BASE_DIR / "county_unique_responses"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ==========================
# 2️⃣ PROCESS EACH QUESTION FILE
# ==========================
for file in INPUT_DIR.glob("*.xlsx"):
    try:
        df = pd.read_excel(file)
        if "Response" not in df.columns:
            print(f"⚠️ Skipping {file.name}: No 'Response' column found.")
            continue

        responses = df["Response"].dropna().astype(str).str.strip()
        unique_responses = sorted(set(responses))

        # Write to .txt file named after question short name
        txt_filename = file.stem + ".txt"
        output_path = OUTPUT_DIR / txt_filename

        with open(output_path, "w", encoding="utf-8") as f:
            for line in unique_responses:
                f.write(line + "\n")

        print(f"✅ {txt_filename} written with {len(unique_responses)} unique responses.")

    except Exception as e:
        print(f"❌ Error with {file.name}: {e}")


✅ barriers_ict_use.txt written with 26 unique responses.
✅ barriers_role_expansion.txt written with 24 unique responses.
✅ barriers_women_leadership.txt written with 22 unique responses.
✅ climate_influence_enterprises.txt written with 29 unique responses.
✅ community_engagement_selection.txt written with 28 unique responses.
✅ comparison_male_enterprises.txt written with 23 unique responses.
✅ competition_value_addition.txt written with 26 unique responses.
✅ data_sources_enterprises.txt written with 22 unique responses.
✅ economic_benefits_women.txt written with 25 unique responses.
✅ enterprise_leadership_by_group.txt written with 25 unique responses.
✅ enterprise_replicability.txt written with 28 unique responses.
✅ enterprise_selection_criteria.txt written with 24 unique responses.
✅ enterprise_success_factors.txt written with 28 unique responses.
✅ improving_vc_market_access.txt written with 24 unique responses.
✅ market_demand_vc.txt written with 28 unique responses.
✅ new_clima