In [None]:
import pandas as pd

# Mapping for Likert responses
likert_map = {
    "Strongly Disagree": 1,
    "Disagree": 2,
    "Neutral": 3,
    "Agree": 4,
    "Strongly Agree": 5
}

# Reverse-coded items
reverse_coded = [
    "I feel overwhelmed or burnt out.",
    "Academic stress has affected my overall wellbeing."
]

# Wellness dimension mappings
dimensions = {
    "Mental Wellbeing": [
        "I feel mentally prepared to start this semester.",
        "I feel emotionally balanced and able to manage my feelings.",
        "I feel overwhelmed or burnt out.",
        "I feel confident handling challenges this semester."
    ],
    "Physical Wellbeing": [
        "I am getting enough sleep and rest.",
        "I eat regular and balanced meals.",
        "I engage in some form of physical activity (e.g. walking, sports)."
    ],
    "Social Wellbeing": [
        "I feel a sense of connection with other residents.",
        "I have someone I can talk to when I’m feeling stressed.",
        "I feel accepted and valued in my residential community."
    ],
    "Environmental Comfort": [
        "My room is clean, safe, and comfortable.",
        "Common areas in my residence support my wellbeing (e.g. lounge, kitchen).",
        "My residence is a good place to study or focus."
    ],
    "Resource Awareness": [
        "I know where to go if I need mental health support.",
        "I feel comfortable reaching out to staff or support services.",
        "I have accessed support services (counseling, health, peer support) this semester."
    ],
    "Academic Resilience": [
        "I feel confident managing my academic workload.",
        "I balance study and rest effectively.",
        "Academic stress has affected my overall wellbeing."
    ]
}

# Optional weights for each dimension
dimension_weights = {
    "Mental Wellbeing": 25,
    "Physical Wellbeing": 15,
    "Social Wellbeing": 20,
    "Environmental Comfort": 10,
    "Resource Awareness": 15,
    "Academic Resilience": 15
}

def process_survey(file_path, survey_type):
    # Load the survey CSV file
    df = pd.read_csv(file_path)

    # Flatten Likert scale responses to numbers
    all_likert = sum(dimensions.values(), [])
    likert_cols = [col for col in df.columns if col in all_likert]
    df[likert_cols] = df[likert_cols].replace(likert_map)

    # Reverse-code applicable items
    for col in reverse_coded:
        if col in df.columns:
            df[col] = 6 - df[col]

    # Normalize to 0–100 scale
    for col in likert_cols:
        df[col] = ((df[col] - 1) / 4) * 100

    # Calculate dimension averages
    for dim, questions in dimensions.items():
        present_cols = [q for q in questions if q in df.columns]
        if present_cols:
            df[dim] = df[present_cols].mean(axis=1)

    # Weighted overall wellbeing index
    total_weight = sum(dimension_weights.values())
    overall_score = 0
    for dim, weight in dimension_weights.items():
        if dim in df.columns:
            overall_score += df[dim] * (weight / total_weight)
    df["Overall Wellbeing Index"] = overall_score.round(2)

    # Add survey type
    df["Survey Type"] = survey_type

    return df

# Process each survey
pulse1_df = process_survey("/content/pulse1.csv", "Pulse 1")
pulse2_df = process_survey("/content/pulse2.csv", "Pulse 2")
pulse3_df = process_survey("/content/pulse3.csv", "Pulse 3")
comp_df   = process_survey("/content/comprehensive.csv", "Comprehensive")

# Combine all processed data
all_surveys = pd.concat([pulse1_df, pulse2_df, pulse3_df, comp_df], ignore_index=True)

# Export result
all_surveys.to_excel("all_processed_surveys.xlsx", index=False)
# Alternatively: all_surveys.to_csv("all_processed_surveys.csv", index=False)

print("✅ Export complete!")