In [2]:
import numpy as np
import pandas as pd
from dataclasses import dataclass


@dataclass
class BoundingBox:
    min_lat: float
    max_lat: float
    min_lon: float
    max_lon: float


TIME_GAP = 15 * 60  # 15 minutes in seconds


# function to generate n timestamps randomly over last 15 minutes
def generate_timestamps(n: int) -> pd.Series:
    # generate n random integers between 0 and 15*60
    seconds = np.random.randint(0, TIME_GAP, n)
    # subtract the seconds from the current time
    now = pd.Timestamp.now()
    timestamps = now - pd.to_timedelta(seconds, unit="s")
    return timestamps


# create latitudes and longitudes within a bounding box
# also add a timestamp to each point to simulate a time series
def generate_points(n: int, bbox: BoundingBox) -> pd.DataFrame:
    lats = np.random.uniform(bbox.min_lat, bbox.max_lat, n)
    lons = np.random.uniform(bbox.min_lon, bbox.max_lon, n)
    timestamps = generate_timestamps(n)
    return pd.DataFrame({"lat": lats, "lon": lons, "timestamp": timestamps})


# create a bounding box
bbox = BoundingBox(40.5, 40.9, -74.3, -73.7)

# generate 1000 points within the bounding box
df = generate_points(1000, bbox)
print(df.head())

         lat        lon                  timestamp
0  40.517969 -74.298845 2025-02-16 13:54:43.559362
1  40.596375 -73.765885 2025-02-16 13:52:14.559362
2  40.899151 -73.762868 2025-02-16 13:48:33.559362
3  40.802635 -74.193544 2025-02-16 13:48:20.559362
4  40.630983 -74.155055 2025-02-16 13:49:39.559362


In [3]:
%pwd

'/Users/kzc0l4/arcdemo/notebooks'

In [4]:
import os

os.chdir("../")
%pwd

'/Users/kzc0l4/arcdemo'

In [5]:
df.to_csv("data/call_logs.csv", index=False)

In [5]:
from pathlib import Path


@dataclass(frozen=True)
class DataGenerationConfig:
    number_of_samples: int
    local_data_file: Path

In [7]:
os.chdir("src/")
%pwd

'/Users/kzc0l4/arcdemo/src'

<class 'module'>


In [None]:
from arcdemo.constants import CONFIG_FILE_PATH
from arcdemo.utils.common import read_config


class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_config(config_filepath)

    def get_data_generation_config(self) -> DataGenerationConfig:
        config = self.config.data_generation

        data_generation_config = DataGenerationConfig(
            number_of_samples=config.number_of_samples, local_data_file=config.local_data_file
        )

        return data_generation_config

In [11]:
os.chdir("../")
config = ConfigurationManager()

print(config.get_data_generation_config())

[2025-02-16 13:56:51,547: INFO: common: Config file loaded: confs/config.yaml]
DataGenerationConfig(number_of_samples=1000, local_data_file='data/call_logs.csv')
