In [1]:
#%pip install git+https://github.com/Watts-Lab/nomad.git@data-gen

In [1]:
import pandas as pd
import numpy as np
import numpy.random as npr
from datetime import datetime, timedelta
import calendar
from pyproj import Transformer
from concurrent.futures import ProcessPoolExecutor
import concurrent.futures
import multiprocessing
from tqdm import tqdm
from itertools import islice
from functools import partial
import copy

import nomad.city_gen as cg
from nomad.city_gen import City, Building, Street
import nomad.traj_gen
from nomad.traj_gen import Agent, Population
from nomad.constants import DEFAULT_SPEEDS, FAST_SPEEDS, SLOW_SPEEDS, DEFAULT_STILL_PROBS
from nomad.constants import FAST_STILL_PROBS, SLOW_STILL_PROBS, ALLOWED_BUILDINGS

import os
os.environ['TZ'] = 'UTC'

### Generate N agents

In [2]:
transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True)


def garden_city_to_lat_long(agent, 
                            sparse_traj=True, 
                            full_traj=True):
    if sparse_traj:
        df = agent.sparse_traj
        df['x'] = 15*df['x'] + 4265699
        df['y'] = 15*df['y'] - 4392976

        df['latitude'], df['longitude'] = transformer.transform(
            df['x'].values, df['y'].values)

        df['date'] = df['local_timestamp'].dt.date  # for partitioning purposes

        df = df[['identifier', 'unix_timestamp', 'date', 'latitude', 'longitude']]
        df = df.rename(columns={'identifier': 'uid', 'unix_timestamp': 'timestamp'})
        df = df.reset_index(drop=True)

        agent.sparse_traj = df

    if full_traj:
        df = agent.trajectory
        df['x'] = 15*df['x'] + 4265699
        df['y'] = 15*df['y'] - 4392976

        df['latitude'], df['longitude'] = transformer.transform(
            df['x'].values, df['y'].values)

        df['date'] = df['local_timestamp'].dt.date  # for partitioning purposes

        df = df[['identifier', 'unix_timestamp', 'date', 'latitude', 'longitude']]
        df = df.rename(columns={'identifier': 'uid', 'unix_timestamp': 'timestamp'})
        df = df.reset_index(drop=True)

        agent.trajectory = df
    return None

In [3]:
N = 10000
npr.seed(100)

city = cg.load('garden-city.pkl')
population_n = Population(city)
population_n.generate_agents(N,
                             start_time=datetime(2024, 1, 1, hour=8, minute=0),
                             dt=1, seed=123)

In [None]:
%%time

def generate_agent_trajectory(agent_id, agent, seed):
    
    # May want to put more thought into the ranges
    beta_start = npr.uniform(60, 1200)
    beta_duration = npr.uniform(15, 180)
    beta_ping = npr.uniform(5, 30)
    
    param = (beta_start, beta_duration, beta_ping)
    
    for month in range(1,13):
        days = calendar.monthrange(2024, month)[1]
        population_n.generate_trajectory(agent, 
                                         T=datetime(2024, month, days, hour=23, minute=59), 
                                         seed=seed)
    
        agent.sample_traj_hier_nhpp(*param, 
                                    seed=seed,
                                    reset_traj=True)
    
    garden_city_to_lat_long(agent,
                            sparse_traj=True,
                            full_traj=False)
    
    return agent_id, copy.deepcopy(agent)

manager = multiprocessing.Manager()
shared_roster = manager.dict(population_n.roster)

start = 1
end = 10
batch = islice(shared_roster.items(), start, end)

with ProcessPoolExecutor() as executor:
    with tqdm(total=(end-start+1), desc="Processing agents") as pbar:
        futures = [
            executor.submit(generate_agent_trajectory, agent_id, agent, i)
            for i, (agent_id, agent) in enumerate(batch, start=start)
        ]
        for future in futures:
            agent_id, agent = future.result()
            shared_roster[agent_id] = agent
            pbar.update(1)  # Update progress bar

population_n.roster = dict(shared_roster)

Processing agents:   0%|          | 0/10 [01:34<?, ?it/s]Process ForkProcess-194:
Process ForkProcess-192:
Process ForkProcess-193:
Process ForkProcess-189:
Process ForkProcess-190:
Process ForkProcess-188:
Process ForkProcess-191:
Process ForkProcess-184:
Process ForkProcess-185:
Process ForkProcess-177:
Process ForkProcess-179:
Process ForkProcess-181:
Process ForkProcess-182:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkProcess-178:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkProcess-176:
Process ForkProcess-175:
  File "/home/ec2-user/SageMaker/custom-miniconda/miniconda/envs/py_310_env/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/ec2-user/SageMaker/custom-miniconda/miniconda/envs/py_3

KeyboardInterrupt: 

In [None]:
partition_cols = {
    'sparse_traj': ['date'],
    'diaries': ['identifier']
}

roster = islice(population_n.roster.items(), start, end)

population_n.save_pop(bucket="synthetic-raw-data",
                      prefix=f"agents-{start}-{end}",
                      save_full_traj=False,
                      save_sparse_traj=True,
                      save_homes=True,
                      save_diaries=True,
                      partition_cols=partition_cols)

In [None]:
# Load the Parquet file

s3_path = "s3://synthetic-raw-data/10k-agents/sparse_trajectories.parquet/"
df = pd.read_parquet(s3_path) 

df

In [None]:
population_n.roster

In [None]:
roster = islice(population_n.roster.items(), start, end)