In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the dataset (replace this with your actual file path if reading from CSV)
data = pd.read_csv("your_data.csv", sep="\t")  # if tab-separated

# Extract participant group from ID
data["PARTICIPANT_TYPE"] = data["ID_participant"].str[0].map({"P": "Psychologist", "M": "Physician", "E": "Nurse"})

# Create experience group
data["EXPERIENCE_GROUP"] = pd.cut(data["EXPERIENCE_LEVEL"], bins=[-1, 2, 10], labels=["Low", "High"])

# Quick summary
print("\n--- Descriptive Statistics ---")
print(data.describe())

print("\n--- Counts per Participant Type ---")
print(data["PARTICIPANT_TYPE"].value_counts())


In [None]:
# Correlation matrix
plt.figure(figsize=(12, 8))
corr = data.select_dtypes(include='number').corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix")
plt.tight_layout()
plt.show()

In [None]:
# Group comparison: Experience
grouped_exp = data.groupby("EXPERIENCE_GROUP").mean(numeric_only=True)
print("\n--- Mean Scores by Experience Group ---")
print(grouped_exp)

In [None]:
# Group comparison: Participant Type
grouped_type = data.groupby("PARTICIPANT_TYPE").mean(numeric_only=True)
print("\n--- Mean Scores by Participant Type ---")
print(grouped_type)

In [None]:
# Plot: NASA TLX vs Trust & Usefulness
plt.figure(figsize=(10, 5))
sns.scatterplot(data=data, x="NASA_Work_Load_Index", y="PERCEIVED_USEFULNESS", hue="PARTICIPANT_TYPE")
plt.title("NASA Workload vs Perceived Usefulness")
plt.show()

plt.figure(figsize=(10, 5))
sns.scatterplot(data=data, x="NASA_Work_Load_Index", y="TRUST_LEVEL", hue="PARTICIPANT_TYPE")
plt.title("NASA Workload vs Trust Level")
plt.show()

# Optional: Save cleaned dataset
# data.to_csv("cleaned_survey_data.csv", index=False)