# Task 4.2: Activity & Sleep Features

**Goal**: Engineer behavioral features related to physical activity and sleep.

**Methodology**:
- **Activity**: Active/Sedentary ratios, Rolling 24h activity.
- **Sleep**: Heuristic inference (Dark + Lock + Quiet + Night).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add src to path
sys.path.append('../../')

from src.features.activity_sleep import process_activity_sleep

## 1. Load Data

In [None]:
DATA_DIR = Path('../../data/processed')
TEST_DATA_DIR = Path('../../data/processed_test')

# Try loading main dataset, fallback to test dataset
if (DATA_DIR / 'train.parquet').exists():
    df = pd.read_parquet(DATA_DIR / 'train.parquet')
    print("Loaded full training data")
elif (TEST_DATA_DIR / 'train.parquet').exists():
    df = pd.read_parquet(TEST_DATA_DIR / 'train.parquet')
    print("Loaded test subset data")
else:
    print("No data found. Generating dummy data.")
    dates = pd.date_range(start='2013-03-27', end='2013-06-05', freq='h')
    df = pd.DataFrame({'timestamp': dates})
    df['hour_of_day'] = df['timestamp'].dt.hour
    # Mock sensor data
    df['activity_active_minutes'] = np.random.randint(0, 60, size=len(df))
    df['dark_minutes'] = np.where(df['hour_of_day'].isin([0,1,2,3,4,5]), 60, 0)
    df['phonelock_minutes'] = np.where(df['hour_of_day'].isin([0,1,2,3,4,5]), 60, 10)
    df['audio_voice_minutes'] = np.random.randint(0, 10, size=len(df))

print(f"Shape: {df.shape}")

## 2. Feature Engineering

In [None]:
df_features = process_activity_sleep(df)
df_features[['timestamp', 'active_ratio', 'is_asleep', 'sleep_duration_24h']].head(10)

## 3. Visualization

### 3.1 Activity Heatmap
Activity levels by Hour and Day of Week.

In [None]:
if 'day_of_week' not in df_features.columns:
    df_features['day_of_week'] = df_features['timestamp'].dt.dayofweek

pivot = df_features.pivot_table(
    index='day_of_week', 
    columns='hour_of_day', 
    values='active_ratio', 
    aggfunc='mean'
)

plt.figure(figsize=(12, 6))
sns.heatmap(pivot, cmap='Reds', annot=True, fmt='.2f')
plt.title("Average Activity Ratio (0=Sedentary, 1=Active)")
plt.ylabel("Day of Week (0=Mon, 6=Sun)")
plt.show()

### 3.2 Sleep Duration Distribution
Checking the distribution of inferred sleep hours.

In [None]:
plt.figure(figsize=(8, 4))
sns.histplot(df_features['sleep_duration_24h'], bins=20, kde=True)
plt.title("Distribution of Rolling 24h Sleep Duration")
plt.xlabel("Hours of Sleep (in last 24h)")
plt.show()