In [9]:
import os
import json
import pandas as pd
from datetime import datetime, timezone

# Exercise

In [10]:
# Main Path
folderExercise = "./../../raw_data/EMA/response/Exercise/"

# List to collect all rows
all_rows = []

# Loop through all files in the Exercise folder
for filename in os.listdir(folderExercise):
    if filename.startswith("Exercise_u") and filename.endswith(".json"):
        user_id = filename.replace("Exercise_", "").replace(".json", "")
        file_path = os.path.join(folderExercise, filename)
        # print(f"Processing file: {filename}")

        with open(file_path, "r") as f:
            try:
                entries = json.load(f)
            except Exception as e:
                # print(f"Failed to load {filename}: {e}")
                continue

        for entry in entries:
            # Parse values with fallbacks
            exercise = entry.get("exercise")
            walk = entry.get("walk")
            have = entry.get("have")
            schedule = entry.get("schedule")
            location = entry.get("location", None)
            resp_time = entry.get("resp_time", None)

            # Convert timestamp to datetime
            if resp_time:
                dt = datetime.fromtimestamp(resp_time, tz=timezone.utc)

                readable_time = dt.isoformat()
                weekday = dt.strftime("%A")
                hour = dt.hour
            else:
                readable_time = None
                weekday = None
                hour = None

            # Handle location
            lat, lon = None, None
            if location and location != "Unknown" and location != "null":
                try:
                    lat_str, lon_str = location.split(",")
                    lat, lon = float(lat_str), float(lon_str)
                except:
                    pass

            row = {
                "user_id": user_id,
                "resp_time": readable_time,
                "weekday": weekday,
                "hour": hour,
                "exercise": int(exercise) if exercise and exercise != "null" else None,
                "walk": int(walk) if walk and walk != "null" else None,
                "have": int(have) if have and have != "null" else None,
                "schedule": int(schedule) if schedule and schedule != "null" else None,
                "latitude": lat,
                "longitude": lon,
                "has_location": int(location not in ["Unknown", "null", None]),
            }

            all_rows.append(row)

# Create a DataFrame
df_exercise = pd.DataFrame(all_rows)
df_exercise = df_exercise.sort_values(
    by=["user_id", "resp_time"]
)  # sort by user + time

# Save to CSV
# df_exercise.to_csv("./Exercise.csv", index=False)

# Social

In [11]:
# Path to the folder containing Social_u00.json to Social_u59.json
folderExercise = "./../../raw_data/EMA/response/Social/"

# Store cleaned entries
all_social_data = []

# Loop through all relevant files
for filename in os.listdir(folderExercise):
    if filename.startswith("Social_u") and filename.endswith(".json"):
        user_id = filename.replace("Social_", "").replace(".json", "")
        file_path = os.path.join(folderExercise, filename)

        with open(file_path, "r") as f:
            try:
                entries = json.load(f)
            except Exception as e:
                print(f"Could not load {filename}: {e}")
                continue

        for entry in entries:
            number = entry.get("number")
            location = entry.get("location")
            resp_time = entry.get("resp_time")

            # Convert timestamp to UTC datetime
            try:
                dt = datetime.fromtimestamp(resp_time, tz=timezone.utc)
            except:
                dt = None

            all_social_data.append(
                {
                    "user_id": user_id,
                    "resp_time": resp_time,
                    "datetime_utc": dt,
                    "number": number,
                    "location": location,
                }
            )

# Convert to DataFrame
df_social = pd.DataFrame(all_social_data)
df_social = df_social.sort_values(by=["user_id", "datetime_utc"])  # sort by user + time

# Save to CSV
# df_social.to_csv("./Social.csv", index=False)

# Sleep

In [12]:
# Folder where Sleep_uXX.json files are located
folderSleep = "./../../raw_data/EMA/response/Sleep/"

# Collect cleaned records
all_sleep_data = []

# Go through all Sleep_uXX.json files
for filename in os.listdir(folderSleep):
    if filename.startswith("Sleep_u") and filename.endswith(".json"):
        user_id = filename.replace("Sleep_", "").replace(".json", "")
        file_path = os.path.join(folderSleep, filename)

        with open(file_path, "r") as f:
            try:
                entries = json.load(f)
            except Exception as e:
                print(f"Could not load {filename}: {e}")
                continue

        for entry in entries:
            sleep_hours = entry.get("hour")
            sleep_quality = entry.get("rate")
            sleepiness = entry.get("social")  # Trouble staying awake
            location = entry.get("location")
            resp_time = entry.get("resp_time")

            # Convert timestamp to UTC datetime
            try:
                dt = datetime.fromtimestamp(resp_time, tz=timezone.utc)
            except:
                dt = None

            all_sleep_data.append(
                {
                    "user_id": user_id,
                    "resp_time": resp_time,
                    "datetime_utc": dt,
                    "sleep_hours": sleep_hours,
                    "sleep_quality": sleep_quality,
                    "sleepiness": sleepiness,
                    "location": location,
                }
            )

# Convert to DataFrame
df_sleep = pd.DataFrame(all_sleep_data)
df_sleep = df_sleep.sort_values(by=["user_id", "datetime_utc"])  # sort by user + time

# df_sleep.to_csv("./Sleep.csv", index=False)

# Events

In [13]:
# Folder containing Event_uXX.json files
folderEvents = "./../../raw_data/EMA/response/Events/"

all_event_data = []

for filename in os.listdir(folderEvents):
    if filename.startswith("Events_u") and filename.endswith(".json"):
        user_id = filename.replace("Events_", "").replace(".json", "")
        filepath = os.path.join(folderEvents, filename)

        with open(filepath, "r") as f:
            try:
                records = json.load(f)
            except Exception as e:
                print(f"Could not read {filename}: {e}")
                continue

        for r in records:
            # Convert timestamp
            try:
                dt = datetime.fromtimestamp(r.get("resp_time"), tz=timezone.utc)
            except:
                dt = None

            all_event_data.append(
                {
                    "user_id": user_id,
                    "resp_time": r.get("resp_time"),
                    "datetime_utc": dt,
                    "positive_event_score": (
                        int(r["positive"])
                        if r.get("positive") not in [None, "null"]
                        else None
                    ),
                    "negative_event_score": (
                        int(r["negative"])
                        if r.get("negative") not in [None, "null"]
                        else None
                    ),
                    "emotion_range": (
                        int(r["positive"]) - int(r["negative"])
                        if r.get("positive")
                        and r.get("negative")
                        and r["positive"] != "null"
                        and r["negative"] != "null"
                        else None
                    ),
                    "has_positive_text": 1 if r.get("pevent") else 0,
                    "has_negative_text": 1 if r.get("nevent") else 0,
                    "positive_text": r.get("pevent"),
                    "negative_text": r.get("nevent"),
                    "location": r.get("location"),
                }
            )

# Create DataFrame and save
df_event = pd.DataFrame(all_event_data)
# df_event.to_csv("./Events.csv", index=False)

In [14]:
# Folder containing Class2_uXX.json files
folderClass = "./../../raw_data/EMA/response/Class 2/"
all_class_data = []

for filename in os.listdir(folderClass):
    if filename.startswith("Class 2_u") and filename.endswith(".json"):
        user_id = filename.replace("Class 2_", "").replace(".json", "")
        filepath = os.path.join(folderClass, filename)

        with open(filepath, "r") as f:
            try:
                records = json.load(f)
            except Exception as e:
                print(f"Could not read {filename}: {e}")
                continue

        for r in records:
            # Convert timestamp if exists
            try:
                dt = datetime.fromtimestamp(r.get("resp_time"), tz=timezone.utc)
            except:
                dt = None

            all_class_data.append(
                {
                    "user_id": user_id,
                    "resp_time": r.get("resp_time"),
                    "datetime_utc": dt,
                    "challenge": r.get("challenge"),
                    "effort": r.get("effort"),
                    "grade": r.get("grade"),
                    "location": r.get("location"),  # Assuming it's a string or missing
                }
            )

# Create DataFrame and save
df_class = pd.DataFrame(all_class_data)
df_class.to_csv("./Class2.csv", index=False)