In [1]:
import gzip
import json
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pytz
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import os
import random

import imn_loading

In [15]:
def read_poi_data(filepath):
    poi_data = {}
    with gzip.open(filepath, 'rt', encoding='utf-8') as f:
        for line in f:
            row = json.loads(line)
            # Keep UID as int to match IMN data format
            poi_data[row['uid']] = row
    return poi_data

poi_to_activity = {
    "education": "school", "food_and_drink": "eat", "shopping": "shop",
    "entertainment_and_recreation": "leisure", "transportation": "transit",
    "healthcare": "health", "public_services": "admin", "finance": "finance",
    "utilities": "utility", "other": "unknown"
}

def enrich_imn_with_poi(imn, poi_info):
    poi_classes = poi_info["poi_classes"]
    enriched = {}
    for loc_id, loc in imn["locations"].items():
        vec = poi_info["poi_freq"].get(loc_id, [0.0]*len(poi_classes))
        top_idx = int(np.argmax(vec))
        label = poi_to_activity.get(poi_classes[top_idx], "unknown")
        if loc_id == imn.get("home"): label = "home"
        if loc_id == imn.get("work"): label = "work"
        enriched[loc_id] = {**loc, "activity_label": label}
    imn["locations"] = enriched
    return imn

def extract_stays_from_trips(trips, locations):
    """Convert trips into stays by considering the destination of each trip as a stay."""
    stays = []
    
    # First pass: create stays with start times
    for from_id, to_id, st, et in trips:
        activity_label = locations[to_id].get('activity_label', 'unknown')
        # Create stay with start time as trip end time
        stays.append(Stay(to_id, activity_label, et, None))
    
    # Second pass: set end times based on next stay's start time
    for i in range(len(stays)-1):
        stays[i].set_end_time(stays[i+1].start_time)
    
    # Handle the last stay
    if stays:
        # For the last stay, if there's a next day's first trip, use that as end time
        # Otherwise, use a default duration of 1 hour
        last_stay = stays[-1]
        if last_stay.start_time is not None:
            last_stay.set_end_time(last_stay.start_time + 3600)  # Default 1 hour duration
    
    return stays

def extract_stays_by_day(stays):
    """Group stays by day, handling cross-day stays."""
    stays_by_day = defaultdict(list)
    
    for stay in stays:
        if stay.start_time is None or stay.end_time is None:
            continue
            
        start_dt = datetime.fromtimestamp(stay.start_time, tz)
        end_dt = datetime.fromtimestamp(stay.end_time, tz)
        
        # If stay spans multiple days, split it
        current_dt = start_dt.replace(hour=0, minute=0, second=0, microsecond=0)
        end_of_day = current_dt + timedelta(days=1)
        
        while current_dt < end_dt:
            day_start = max(start_dt, current_dt)
            day_end = min(end_dt, end_of_day)
            
            # Create stay for this day
            day_stay = Stay(
                stay.location_id,
                stay.activity_label,
                int(day_start.timestamp()),
                int(day_end.timestamp())
            )
            
            stays_by_day[current_dt.date()].append(day_stay)
            
            # Move to next day
            current_dt = end_of_day
            end_of_day = current_dt + timedelta(days=1)
    
    return stays_by_day

In [3]:
# Load IMNs and POI data from the new test files
imns = imn_loading.read_imn('data/test_milano_imns.json.gz')
poi_data = read_poi_data('data/test_milano_imns_pois.json.gz')
print(f"Loaded {len(imns)} IMNs and POI data for {len(poi_data)} users")

Loaded 11658 IMNs and POI data for 11658 users


In [11]:
user_id = 650

In [12]:
imn = imns[user_id]
imn.keys()

dict_keys(['locations', 'trips', 'home', 'work'])

In [None]:
poi_data[user_id]

{'uid': 650,
 'poi_classes': ['other',
  'transportation',
  'healthcare',
  'public_services',
  'finance',
  'food_and_drink',
  'utilities',
  'education',
  'entertainment_and_recreation',
  'shopping'],
 'poi_freq': {'0': [1.0, 1.0, 1.0, 3.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
  '1': [0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
  '2': [3.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 0.0, 0.0, 0.0],
  '3': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  '4': [1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0],
  '5': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  '6': [1.0, 2.0, 0.0, 0.0, 0.0, 3.0, 1.0, 0.0, 0.0, 0.0],
  '7': [2.0, 1.0, 1.0, 2.0, 1.0, 5.0, 1.0, 0.0, 0.0, 0.0],
  '8': [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 0.0],
  '9': [189.0, 16.0, 1.0, 1.0, 0.0, 5.0, 3.0, 0.0, 0.0, 0.0],
  '10': [27.0, 4.0, 1.0, 19.0, 0.0, 4.0, 4.0, 0.0, 0.0, 0.0],
  '11': [1.0, 3.0, 0.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0],
  '12': [0.0, 7.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}}

In [14]:
# Enrich IMN with POI data
enriched = enrich_imn_with_poi(imn, poi_data[user_id])
enriched

{'locations': {'0': {'coordinates': [9.175958, 45.528595],
   'frequency': 20,
   'activity_label': 'home'},
  '1': {'coordinates': [9.20885, 45.414547],
   'frequency': 4,
   'activity_label': 'work'},
  '2': {'coordinates': [9.174006, 45.527006],
   'frequency': 4,
   'activity_label': 'unknown'},
  '3': {'coordinates': [9.166437, 45.557348],
   'frequency': 4,
   'activity_label': 'unknown'},
  '4': {'coordinates': [9.175718, 45.526646],
   'frequency': 4,
   'activity_label': 'utility'},
  '5': {'coordinates': [9.05514, 45.41317],
   'frequency': 2,
   'activity_label': 'unknown'},
  '6': {'coordinates': [9.23827, 45.552969],
   'frequency': 2,
   'activity_label': 'eat'},
  '7': {'coordinates': [9.174558, 45.528896],
   'frequency': 2,
   'activity_label': 'eat'},
  '8': {'coordinates': [9.1779, 45.508082],
   'frequency': 2,
   'activity_label': 'utility'},
  '9': {'coordinates': [9.150353, 45.520783],
   'frequency': 2,
   'activity_label': 'unknown'},
  '10': {'coordinates': [9

In [16]:
# Extract stays from trips
stays = extract_stays_from_trips(enriched['trips'], enriched['locations'])
stays

NameError: name 'Stay' is not defined

In [None]:
# Group stays by day
stays_by_day = extract_stays_by_day(stays)