In [54]:
%load_ext autoreload
%autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [140]:
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import date, time, datetime, timedelta
from dateutil.parser import parse

In [141]:
import nomad.io.base as loader
import nomad.visit_attribution as va
import nomad.stop_detection.lachesis as LACHESIS

In [146]:
def dawn_time(day_part, dawn_hour=6):
    s,e = day_part
    return np.min([(e.hour*60 + e.minute),dawn_hour*60]) - np.min([(s.hour*60 + s.minute),dawn_hour*60]) 

def dusk_time(day_part, dusk_hour=19):
    s,e = day_part
    return np.max([(e.hour*60 + e.minute)-dusk_hour*60,0]) - np.max([(s.hour*60 + s.minute)-dusk_hour*60, 0])

def slice_datetimes_interval_fast(start, end):
    full_days = (datetime.combine(end, time.min) - datetime.combine(start, time.max)).days
    if full_days >= 0:
        day_parts = [(start.time(), time.max), (time.min, end.time())]
    else:
        full_days = 0
        day_parts = [(start.time(), end.time()), (start.time(), start.time())]
    return full_days, day_parts

def duration_at_night_fast(start, end):
    dawn_hour = 6
    dusk_hour = 19
    full_days, (part1, part2) = slice_datetimes_interval_fast(start, end)
    total_dawn_time = dawn_time(part1, dawn_hour)+dawn_time(part2, dawn_hour)
    total_dusk_time = dusk_time(part1, dusk_hour)+dusk_time(part2, dusk_hour)
    return int(total_dawn_time + total_dusk_time + full_days*(dawn_hour + (24-dusk_hour))*60)

def clip_stays_date(traj, dates):
    start = pd.to_datetime(traj['start_datetime'])
    duration = traj['duration']

    # Ensure timezone-aware clipping bounds
    tz = start.dt.tz
    date_0 = pd.Timestamp(parse(dates[0]), tz=tz)
    date_1 = pd.Timestamp(parse(dates[1]), tz=tz)

    end = start + pd.to_timedelta(duration, unit='m')

    # Clip to date range
    start_clipped = start.clip(lower=date_0, upper=date_1)
    end_clipped = end.clip(lower=date_0, upper=date_1)

    # Recompute durations
    duration_clipped = ((end_clipped - start_clipped).dt.total_seconds() // 60).astype(int)
    duration_night = [duration_at_night_fast(s, e) for s, e in zip(start_clipped, end_clipped)]

    return pd.DataFrame({
        'id': traj['id'].values,
        'start': start_clipped,
        'duration': duration_clipped,
        'duration_night': duration_night,
        'location': traj['location']
    })

def count_nights(usr_polygon):   
    min_dwell = 10
    dawn_hour = 6
    dusk_hour = 19
    nights = set()
    weeks = set()

    for _, row in usr_polygon.iterrows():
        d = row['start']
        d = pd.to_datetime(d)
        full_days, (part1, part2) = slice_datetimes_interval_fast(d, d + pd.to_timedelta(row['duration'], unit='m'))

        dawn1 = dawn_time(part1, dawn_hour)
        dusk1 = dusk_time(part1, dusk_hour)
        dawn2 = dawn_time(part2, dawn_hour)
        dusk2 = dusk_time(part2, dusk_hour)

        if full_days == 0:
            if dawn1 >= min_dwell:
                night = d - timedelta(days=1)
                nights.add(night.date())
                weeks.add((night - timedelta(days=night.weekday())).date())

            if (dusk1 + dawn2) >= min_dwell:
                night = d
                nights.add(night.date())
                weeks.add((night - timedelta(days=night.weekday())).date())

            if dusk2 >= min_dwell:
                night = d + timedelta(days=1)
                nights.add(night.date())
                weeks.add((night - timedelta(days=night.weekday())).date())
        else:
            if dawn1 >= min_dwell:
                night = d - timedelta(days=1)
                nights.add(night.date())
                weeks.add((night - timedelta(days=night.weekday())).date())

            for t in range(full_days + 1):
                night = d + timedelta(days=t)
                nights.add(night.date())
                weeks.add((night - timedelta(days=night.weekday())).date())

            if dusk2 >= min_dwell:
                night = d + timedelta(days=full_days + 1)
                nights.add(night.date())
                weeks.add((night - timedelta(days=night.weekday())).date())

    identifier = usr_polygon['id'].iloc[0]
    location = usr_polygon['location'].iloc[0]

    return pd.DataFrame([{
        'id': identifier,
        'location': location,
        'night_count': len(nights),
        'week_count': len(weeks)
    }])

In [147]:
traj_cols = {'uid':'uid',
             'x':'x',
             'y':'y',
             'timestamp':'timestamp'}

diaries_df = loader.from_file("../nomad/data/diaries", format="parquet", traj_cols=traj_cols,
                       parse_dates=True)
sparse_df = loader.from_file("../nomad/data/sparse_traj/", format="parquet", traj_cols=traj_cols,
                      parse_dates=True)
poi_table = gpd.read_file('garden_city.geojson')

# Reproject from gc_coords to web mercator
sparse_df.loc[:,'x'] = (sparse_df['x'] - 4265699)/15
sparse_df.loc[:,'y'] = (sparse_df['y'] + 4392976)/15

diaries_df.loc[:,'x'] = (diaries_df['x'] - 4265699)/15
diaries_df.loc[:,'y'] = (diaries_df['y'] + 4392976)/15

# Select data from 1 user
user = diaries_df.uid.unique()[0]
user_sample = sparse_df.loc[sparse_df['uid'] == user]

user_sample

Unnamed: 0,uid,timestamp,longitude,latitude,x,y,local_timestamp,ha,date
11327,admiring_curie,1704148860,38.321317,-36.666853,14.031005,13.632870,2024-01-02 00:41:00+02:00,11.25,2024-01-02
11328,admiring_curie,1704149640,38.321373,-36.666809,14.449639,14.038498,2024-01-02 00:54:00+02:00,11.25,2024-01-02
11329,admiring_curie,1704150900,38.321450,-36.666825,15.022904,13.891453,2024-01-02 01:15:00+02:00,11.25,2024-01-02
11330,admiring_curie,1704151080,38.321470,-36.666668,15.171014,15.337402,2024-01-02 01:18:00+02:00,11.25,2024-01-02
11331,admiring_curie,1704151380,38.321387,-36.666771,14.551866,14.383345,2024-01-02 01:23:00+02:00,11.25,2024-01-02
...,...,...,...,...,...,...,...,...,...
143046,admiring_curie,1705740660,38.319985,-36.666497,4.144425,16.921279,2024-01-20 10:51:00+02:00,11.25,2024-01-20
143047,admiring_curie,1705741440,38.320014,-36.666660,4.366099,15.416760,2024-01-20 11:04:00+02:00,11.25,2024-01-20
143048,admiring_curie,1705744320,38.320025,-36.666459,4.446864,17.270041,2024-01-20 11:52:00+02:00,11.25,2024-01-20
143049,admiring_curie,1705744500,38.320077,-36.666465,4.828045,17.222850,2024-01-20 11:55:00+02:00,11.25,2024-01-20


In [151]:
DUR_MIN=5
DT_MAX=60
DELTA_ROAM=100

traj_cols = {'uid':'uid',
             'x':'x',
             'y':'y',
             'datetime':'local_timestamp'}

stop_table_lachesis = LACHESIS.lachesis(traj=user_sample,
                                        dur_min=DUR_MIN,
                                        dt_max=DT_MAX,
                                        delta_roam=DELTA_ROAM,
                                        traj_cols=traj_cols,
                                        keep_col_names=False,
                                        complete_output=True,
                                        datetime = 'local_timestamp')

labels_lachesis = LACHESIS._lachesis_labels(traj=user_sample,
                                            dur_min=DUR_MIN,
                                            dt_max=DT_MAX,
                                            delta_roam=DELTA_ROAM,
                                            traj_cols=traj_cols,
                                            datetime = 'local_timestamp')
labels_lachesis.name = 'cluster'

pred_lachesis = va.point_in_polygon(traj=user_sample,
                 labels=labels_lachesis,
                 stop_table=stop_table_lachesis,
                 poi_table=poi_table,
                 traj_cols=traj_cols,
                 is_datetime=True,
                 is_long_lat=False)

pred_lachesis



Unnamed: 0,start_datetime,end_datetime,duration,x,y,diameter,n_pings,max_gap,location
0,2024-01-02 00:41:00+02:00,2024-01-02 01:26:00+02:00,45,14.551866,14.383345,2.050621,6,21,h-x13-y13
1,2024-01-02 03:17:00+02:00,2024-01-02 06:36:00+02:00,199,14.3593,14.630957,2.713152,20,34,h-x13-y13
2,2024-01-02 19:30:00+02:00,2024-01-02 21:07:00+02:00,97,14.193728,9.58687,2.804123,3,57,p-x13-y11
3,2024-01-03 05:52:00+02:00,2024-01-03 09:39:00+02:00,227,4.794544,10.901612,11.24849,14,47,h-x8-y10
4,2024-01-03 15:10:00+02:00,2024-01-03 15:44:00+02:00,34,4.114724,16.135475,1.451006,3,21,w-x3-y16
5,2024-01-04 20:39:00+02:00,2024-01-05 03:20:00+02:00,401,14.559428,8.326059,2.000332,35,42,h-x13-y8
6,2024-01-05 15:33:00+02:00,2024-01-05 15:58:00+02:00,25,4.753138,6.431516,1.014941,4,17,w-x3-y6
7,2024-01-06 04:19:00+02:00,2024-01-06 04:40:00+02:00,21,11.979128,6.937445,1.104563,3,19,h-x11-y8
8,2024-01-06 13:33:00+02:00,2024-01-06 14:36:00+02:00,63,4.65541,16.576277,6.71263,6,45,w-x3-y13
9,2024-01-06 21:46:00+02:00,2024-01-07 00:02:00+02:00,136,14.359073,8.486195,1.859656,13,24,h-x13-y8


In [152]:
# stop detection output
stop_table_lachesis['start_datetime'] = pd.to_datetime(stop_table_lachesis['start_datetime'])

if 'id' not in stop_table_lachesis.columns:
    stop_table_lachesis['id'] = user

# Date range
start_date = "2024-01-02"
weeks = 2
end_date = (parse(start_date) + timedelta(weeks=weeks)).date().isoformat()
dates = (start_date, end_date)
df_clipped = clip_stays_date(stop_table_lachesis, dates)
df_clipped = df_clipped[(df_clipped['duration'] > 0) & (df_clipped['duration_night'] >= 15)]
df_clipped.groupby(['id', 'location'], group_keys=False).apply(count_nights).reset_index(drop=True)

  df_clipped.groupby(['id', 'location'], group_keys=False).apply(count_nights).reset_index(drop=True)


Unnamed: 0,id,location,night_count,week_count
0,admiring_curie,h-x11-y8,1,1
1,admiring_curie,h-x13-y12,1,1
2,admiring_curie,h-x13-y13,1,1
3,admiring_curie,h-x13-y8,5,2
4,admiring_curie,h-x15-y13,1,1
5,admiring_curie,p-x13-y11,3,2
