In [None]:
import json
import random
from datetime import date, datetime, timedelta
from typing import Dict, List, Optional, Tuple

SLOT_MINUTES = 30

In [21]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def ts_from_index(i: int) -> str:
    # i in [0..47], 30-min slot
    h = (i * 30) // 60
    m = (i * 30) % 60
    return f"{h:02d}:{m:02d}"


In [None]:

def idx(hh: int, mm: int) -> int:
    # convert time to slot index
    return (hh * 60 + mm) // 30

In [None]:
def fill(slots: List[Dict[str, str]], start: int, end: int, activity: str):
    # fill [start, end) slot indices
    for i in range(start, end):
        slots[i]["activity"] = activity

In [None]:
def make_empty_day() -> List[Dict[str, str]]:
    # default all slots as sleep; then overwrite
    slots = [{"ts": ts_from_index(i), "activity": "sleep"} for i in range(48)]
    return slots

In [None]:
def dow_str(d: date) -> str:
    return ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][d.weekday()]

In [None]:
def week_index_from_start(d: date, start: date) -> int:
    # week_index increments every 7 days starting from start_date
    return 1 + ((d - start).days // 7)

In [None]:
# ---------- day templates ----------
def build_weekday(
    d: date,
    week_index: int,
    weather: str,
    late_wakeup: bool,
) -> Tuple[List[Dict[str, str]], Dict]:
    """
    Weekday final rules (Mon–Fri):
      - Normal: wake 06:00, breakfast 06:30, commute 07:00–08:00 subway, work 08:00–12:00,
               lunch 12:00–13:00, work 13:00–17:00, commute 17:00–18:00, dinner 18:00–18:30,
               evening event 18:30–20:30 (MWF english, TTh gym; gym cancelled -> relax_screen),
               shower 20:30, relax_screen 21:00, sleep_winddown 21:30, sleep 22:00–
      - Rain OR late_wakeup => commute_taxi (30min) both ways.
      - Late wakeup shifts morning: wake 07:00, breakfast 07:30, taxi 08:00–08:30, then work.
    """
    day = dow_str(d)
    slots = make_empty_day()

    is_rain = (weather == "rain")
    taxi = is_rain or late_wakeup

    # --- morning ---
    if late_wakeup:
        # wake 07:00, breakfast 07:30, taxi 08:00–08:30, work from 08:30
        fill(slots, idx(7,0), idx(7,30), "wake_up")
        fill(slots, idx(7,30), idx(8,0), "breakfast")
        fill(slots, idx(8,0), idx(8,30), "commute_taxi")
        work_start = idx(8,30)
    else:
        fill(slots, idx(6,0), idx(6,30), "wake_up")
        fill(slots, idx(6,30), idx(7,0), "breakfast")
        if taxi:
            fill(slots, idx(7,0), idx(7,30), "commute_taxi")
            work_start = idx(7,30)
        else:
            fill(slots, idx(7,0), idx(8,0), "commute_subway")
            work_start = idx(8,0)

    # --- work block 1 ---
    fill(slots, work_start, idx(12,0), "work")

    # --- lunch ---
    fill(slots, idx(12,0), idx(13,0), "lunch")

    # --- work block 2 ---
    fill(slots, idx(13,0), idx(17,0), "work")

    # --- commute home + dinner baseline ---
    if taxi:
        # taxi 17:00–17:30 then dinner 17:30–18:30? (we keep dinner 17:30–18:30 only if taxi)
        fill(slots, idx(17,0), idx(17,30), "commute_taxi")
        # give a slightly earlier dinner window when taxi is used
        fill(slots, idx(17,30), idx(18,30), "dinner")
        evening_start = idx(18,30)
    else:
        fill(slots, idx(17,0), idx(18,0), "commute_subway")
        fill(slots, idx(18,0), idx(18,30), "dinner")
        evening_start = idx(18,30)

    # --- evening event ---
    # Mon/Wed/Fri: english_academy ; Tue/Thu: gym (cancel by rain => relax_screen)
    if day in ["Mon", "Wed", "Fri"]:
        fill(slots, evening_start, idx(20,30), "english_academy")
        gym_cancelled = False
    else:
        if is_rain:
            fill(slots, evening_start, idx(20,30), "relax_screen")
            gym_cancelled = True
        else:
            fill(slots, evening_start, idx(20,30), "gym")
            gym_cancelled = False

    # --- night ---
    fill(slots, idx(20,30), idx(21,0), "shower")
    fill(slots, idx(21,0), idx(21,30), "relax_screen")
    fill(slots, idx(21,30), idx(22,0), "sleep_winddown")
    fill(slots, idx(22,0), idx(24,0), "sleep")

    latent = {
        "weather": weather,
        "late_wakeup": late_wakeup,
        "gym_cancelled_by_rain": gym_cancelled,
    }
    return slots, latent


In [24]:
def build_saturday(
    d: date,
    week_index: int,
    weather: str,
) -> Tuple[List[Dict[str, str]], Dict]:
    """
    Saturday (fixed weekly routine, with rain affecting commute mode):
      - wake 10:00
      - breakfast 10:30
      - relax_screen 11:00–12:30
      - lunch 13:00–14:00
      - game 14:00–17:30
      - commute to social 17:30–18:00 (rain => taxi else subway)
      - social_(friend|family) 18:00–19:00  (odd week -> friend, even week -> family)
      - dinner 19:00–20:00
      - social_(friend|family) 20:00–21:00  (same as above)
      - commute home 21:00–21:30 (rain => taxi else subway)
      - shower 21:30–22:00
      - relax_screen 22:00–22:30
      - sleep_winddown 22:30–23:00
      - sleep 23:00–
    """
    slots = make_empty_day()
    is_rain = (weather == "rain")
    commute = "commute_taxi" if is_rain else "commute_subway"

    # ✅ week parity-based branching (week_parity는 observable로 주지 않지만,
    #    generator는 week_index로 분기를 만들어서 패턴이 데이터에 나타나게 함)
    social_label = "social_friend" if (int(week_index) % 2 == 1) else "social_family"

    fill(slots, idx(10,0), idx(10,30), "wake_up")
    fill(slots, idx(10,30), idx(11,0), "breakfast")
    fill(slots, idx(11,0), idx(12,30), "relax_screen")
    # 12:30–13:00 stays sleep by default; override to relax_screen if you prefer
    fill(slots, idx(13,0), idx(14,0), "lunch")
    fill(slots, idx(14,0), idx(17,30), "game")

    fill(slots, idx(17,30), idx(18,0), commute)        # go to social
    fill(slots, idx(18,0), idx(19,0), social_label)    # ✅ friend/family
    fill(slots, idx(19,0), idx(20,0), "dinner")
    fill(slots, idx(20,0), idx(21,0), social_label)    # ✅ friend/family
    fill(slots, idx(21,0), idx(21,30), commute)        # go home

    fill(slots, idx(21,30), idx(22,0), "shower")
    fill(slots, idx(22,0), idx(22,30), "relax_screen")
    fill(slots, idx(22,30), idx(23,0), "sleep_winddown")
    fill(slots, idx(23,0), idx(24,0), "sleep")

    latent = {
        "weather": weather,
        "sat_social_type": "friend" if social_label == "social_friend" else "family"
    }
    return slots, latent


In [None]:
def build_sunday(
    d: date,
    week_index: int,
    weather: str,
) -> Tuple[List[Dict[str, str]], Dict]:
    """
    Sunday (updated final):
      - wake 08:00
      - breakfast 08:30
      - church commute 09:00–09:30 (rain => taxi else subway)
      - church service 09:30–11:30
      - commute back 11:30–12:00 (same mode)
      - lunch 12:00–13:00
      - relax_screen 13:00–14:00
      - gym 14:00–16:00 (rain => relax_screen)
      - shower 16:00–16:30
      - relax_screen 16:30–18:00
      - dinner 18:00–19:00
      - game 19:00–22:00
      - sleep 22:00–
    """
    slots = make_empty_day()
    is_rain = (weather == "rain")
    church_commute = "church_commute_taxi" if is_rain else "church_commute_subway"

    fill(slots, idx(8,0), idx(8,30), "wake_up")
    fill(slots, idx(8,30), idx(9,0), "breakfast")

    fill(slots, idx(9,0), idx(9,30), church_commute)
    fill(slots, idx(9,30), idx(11,30), "church_service")
    fill(slots, idx(11,30), idx(12,0), church_commute)

    fill(slots, idx(12,0), idx(13,0), "lunch")
    fill(slots, idx(13,0), idx(14,0), "relax_screen")

    if is_rain:
        fill(slots, idx(14,0), idx(16,0), "relax_screen")
        gym_cancelled = True
    else:
        fill(slots, idx(14,0), idx(16,0), "gym")
        gym_cancelled = False

    fill(slots, idx(16,0), idx(16,30), "shower")
    fill(slots, idx(16,30), idx(18,0), "relax_screen")

    fill(slots, idx(18,0), idx(19,0), "dinner")
    fill(slots, idx(19,0), idx(22,0), "game")
    fill(slots, idx(22,0), idx(24,0), "sleep")

    latent = {
        "weather": weather,
        "gym_cancelled_by_rain": gym_cancelled,
        "church_commute_mode": "taxi" if is_rain else "subway",
    }
    return slots, latent

In [None]:
def generate_lifelog_dataset(
    user_id: str,
    start_date: str,          # "YYYY-MM-DD"
    num_days: int,
    p_rain: float = 0.2,
    p_late: float = 0.1,
    seed: Optional[int] = 42,
) -> List[Dict]:
    """
    Generates num_days of synthetic 30-min life-logging data.
    - p_rain: probability that a given day has rain (latent)
    - p_late: probability that a weekday has late_wakeup (latent)
      (we apply late_wakeup only on Mon–Fri; weekends ignore late_wakeup by design)
    """
    if seed is not None:
        random.seed(seed)

    start = datetime.strptime(start_date, "%Y-%m-%d").date()
    data = []

    for day_offset in range(num_days):
        d = start + timedelta(days=day_offset)
        day = dow_str(d)
        widx = week_index_from_start(d, start)

        weather = "rain" if random.random() < p_rain else "clear"

        # late_wakeup applies only to weekdays
        late = False
        if day in ["Mon", "Tue", "Wed", "Thu", "Fri"]:
            late = (random.random() < p_late)

        if day in ["Mon", "Tue", "Wed", "Thu", "Fri"]:
            slots, latent = build_weekday(d, widx, weather=weather, late_wakeup=late)
        elif day == "Sat":
            slots, latent = build_saturday(d, widx, weather=weather)
        else:  # Sun
            slots, latent = build_sunday(d, widx, weather=weather)

        record = {
            "user_id": user_id,
            "date": d.isoformat(),
            "day_of_week": day,
            "week_index": widx,
            "slot_minutes": SLOT_MINUTES,
            "observable": {
                "calendar_context": {"day_of_week": day, "week_index": widx},
                "slots": slots,
            },
            "latent_factors": latent,
        }
        data.append(record)

    return data

In [25]:
if __name__ == "__main__":
    # example: generate 60 days starting from 2026-03-02
    dataset = generate_lifelog_dataset(
        user_id="U01",
        start_date="2026-03-02",
        num_days=60,
        p_rain=0.2,     # adjustable
        p_late=0.1,     # adjustable
        seed=2026       # reproducible
    )

    sample_path = '/content/drive/MyDrive/AI-scientist/NLP/과제/sample/synthetic_lifelog_60days.json'

    # save
    with open(sample_path, "w", encoding="utf-8") as f:
        json.dump(dataset, f, ensure_ascii=False, indent=2)

    print("Saved to: ",sample_path)

Saved to:  /content/drive/MyDrive/AI-scientist/NLP/과제/sample/synthetic_lifelog_60days.json
