# Stay-Point Detection and Home/Work Inference\n\nThis notebook loads processed points and trips, runs DBSCAN-based stay-point detection, infers home/work, and saves artifacts + figures.\n\nArtifacts saved under `data/processed/`:\n- 03_stay_points.parquet\n\nFigures saved under `outputs/figures/`:\n- staypoints_clusters.png\n- hotspot_grid.png\n\nNotes:\n- Uses lightweight local projection (equirectangular).\n- Designed to be tolerant to missing columns.\n

In [None]:
import os\nimport pandas as pd\nimport numpy as np\nfrom sklearn.cluster import DBSCAN\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nfrom src.data_loader import load_config\nfrom src.clustering import detect_stay_points_dbscan, infer_home_work, hotspot_grid\nfrom src.feature_engineering import daily_weekly_temporal_features\n\ncfg = load_config("configs/config.yaml")\nprocessed_dir = cfg.get('paths', {}).get('processed_dir', 'data/processed')\noutput_dir = cfg.get('paths', {}).get('output_dir', 'outputs')\nfig_dir = os.path.join(output_dir, 'figures')\nos.makedirs(processed_dir, exist_ok=True)\nos.makedirs(fig_dir, exist_ok=True)\n\npoints_path = os.path.join(processed_dir, '01_trajectories_cleaned.parquet')\ntrips_path = os.path.join(processed_dir, '02_trips.parquet')\nprint(f"Loading: {points_path} and {trips_path}")\ndf_points = pd.read_parquet(points_path) if os.path.exists(points_path) else pd.DataFrame()\ndf_trips = pd.read_parquet(trips_path) if os.path.exists(trips_path) else pd.DataFrame()\nprint(f"Points: {len(df_points)}, Trips: {len(df_trips)}")\n

In [None]:
# Optional: add temporal features for easier filtering/inspection\nif not df_points.empty:\n    df_points = daily_weekly_temporal_features(df_points)\ndf_points.head() if not df_points.empty else df_points\n

## Run Stay-Point Detection (DBSCAN in meters)

In [None]:
eps_m = cfg.get('stay_point_detection', {}).get('eps_meters', 150)\nmin_samples = cfg.get('stay_point_detection', {}).get('min_samples', 5)\ndf_stays = detect_stay_points_dbscan(df_points, eps_m=float(eps_m), min_samples=int(min_samples))\nprint(f"Stay points: {len(df_stays)}")\ndf_stays.head()\n

## Home/Work Inference

In [None]:
df_assign = infer_home_work(df_stays)\nprint(df_assign.head())\ndf_assign.head()\n

## Save Artifacts and Figures

In [None]:
engine = 'pyarrow'\ntry:\n    import pyarrow  # noqa: F401\nexcept Exception:\n    try:\n        import fastparquet  # noqa: F401\n        engine = 'fastparquet'\n    except Exception as e:\n        raise RuntimeError('No parquet engine available. Install pyarrow or fastparquet.')\n\nstay_points_path = os.path.join(processed_dir, '03_stay_points.parquet')\ndf_stays.to_parquet(stay_points_path, index=False, engine=engine)\nprint(f"Saved stay points: {len(df_stays)} to {stay_points_path}")\n\n# Hotspot grid figure\n_, hotspot_fig = hotspot_grid(df_points, bins=100, figures_dir=fig_dir, figure_name='hotspot_grid.png')\nprint(f"Hotspot: {hotspot_fig}")\n\n# Quick cluster scatter for visual check (non-geographic axes)\nfig_path = os.path.join(fig_dir, 'staypoints_clusters.png')\nif not df_stays.empty:\n    plt.figure(figsize=(6,5))\n    plt.scatter(df_stays['center_lon'], df_stays['center_lat'], s=np.clip(df_stays['point_count'], 10, 200), c='tab:blue', alpha=0.7)\n    plt.xlabel('lon'); plt.ylabel('lat'); plt.title('Stay Points (centers)')\n    plt.tight_layout(); plt.savefig(fig_path, dpi=150); plt.close()\n    print(f"Stay-points figure saved: {fig_path}")\nelse:\n    print("No stays to plot.")\n