In [None]:
import numpy as np
import pandas as pd
from dataclasses import dataclass


@dataclass
class BoundingBox:
    min_lat: float
    max_lat: float
    min_lon: float
    max_lon: float


# TIME_GAP = 15 * 60  # 15 minutes in seconds


# function to generate n timestamps randomly over last 15 minutes
def generate_timestamps(n: int) -> pd.Series:
    # generate n random integers between 0 and 15*60
    seconds = np.random.randint(0, TIME_GAP, n)
    # subtract the seconds from the current time
    now = pd.Timestamp.now()
    timestamps = now - pd.to_timedelta(seconds, unit="s")
    return timestamps


# create latitudes and longitudes within a bounding box
# also add a timestamp to each point to simulate a time series
def generate_points(n: int, bbox: BoundingBox) -> pd.DataFrame:
    lats = np.random.uniform(bbox.min_lat, bbox.max_lat, n)
    lons = np.random.uniform(bbox.min_lon, bbox.max_lon, n)
    timestamps = generate_timestamps(n)
    return pd.DataFrame({"lat": lats, "lon": lons, "timestamp": timestamps})


# create a bounding box
bbox = BoundingBox(40.5, 40.9, -74.3, -73.7)

# generate 1000 points within the bounding box
df = generate_points(1000, bbox)
print(df.head())

         lat        lon                  timestamp
0  40.517969 -74.298845 2025-02-16 13:54:43.559362
1  40.596375 -73.765885 2025-02-16 13:52:14.559362
2  40.899151 -73.762868 2025-02-16 13:48:33.559362
3  40.802635 -74.193544 2025-02-16 13:48:20.559362
4  40.630983 -74.155055 2025-02-16 13:49:39.559362


In [2]:
%pwd

'/Users/kzc0l4/arcdemo/notebooks'

In [3]:
import os

os.chdir("../")
%pwd

'/Users/kzc0l4/arcdemo'

In [5]:
df.to_csv("data/call_logs.csv", index=False)

In [21]:
from pathlib import Path


@dataclass
class BoundingBox:
    min_lat: float
    max_lat: float
    min_lon: float
    max_lon: float


@dataclass(frozen=True)
class DataGenerationConfig:
    number_of_samples: int
    local_data_file: Path
    bbox: BoundingBox

In [7]:
os.chdir("src/")
%pwd

'/Users/kzc0l4/arcdemo/src'

<class 'module'>


In [None]:
from arcdemo.constants import CONFIG_FILE_PATH
from arcdemo.utils.common import read_config
from arcdemo.entity.config_entity import BoundingBox, DataGenerationConfig  # noqa: F811


class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_config(config_filepath)

    def get_data_generation_config(self) -> DataGenerationConfig:
        config = self.config.data_generation

        data_generation_config = DataGenerationConfig(
            number_of_samples=config.number_of_samples,
            local_data_file=config.local_data_file,
            bbox=BoundingBox(
                min_lat=config.bbox.min_lat,
                max_lat=config.bbox.max_lat,
                min_lon=config.bbox.min_lon,
                max_lon=config.bbox.max_lon,
            ),
        )

        return data_generation_config

In [25]:
# os.chdir("../")
config = ConfigurationManager()

print(config.get_data_generation_config())

[2025-02-16 14:36:50,340: INFO: common: Config file loaded: confs/config.yaml]
DataGenerationConfig(number_of_samples=1000, local_data_file='data/call_logs.csv', bbox=BoundingBox(min_lat=37.7749, max_lat=37.7755, min_lon=-122.4194, max_lon=-122.4184))


In [5]:
import sys
import os

sys.path.append(os.path.abspath("src"))

In [None]:
# Description: Generate random points within a bounding box.
import pandas as pd

from arcdemo.constants import TIME_GAP
from arcdemo.entity.config_entity import DataGenerationConfig


class DataGeneration:
    """
    Generate random points within a bounding box.
    """

    def __init__(self, config: DataGenerationConfig):
        self.config = config

    def _generate_timestamps(self) -> pd.Series:
        """
        Generate n random timestamps within the last TIME_GAP seconds.
        """
        # generate n random integers between 0 and 15*60
        seconds = np.random.randint(0, TIME_GAP, self.config.number_of_samples)
        # subtract the seconds from the current time
        now = pd.Timestamp.now()
        timestamps = now - pd.to_timedelta(seconds, unit="s")
        return timestamps

    def _generate_points(self) -> pd.DataFrame:
        """
        Generate n random points within the bounding box.
        """
        n = self.config.number_of_samples
        bbox = self.config.bbox
        lats = np.random.uniform(bbox.min_lat, bbox.max_lat, n)
        lons = np.random.uniform(bbox.min_lon, bbox.max_lon, n)
        timestamps = self._generate_timestamps()
        return pd.DataFrame(
            {
                "lat": lats,
                "lon": lons,
                "timestamp": timestamps,
            }
        )

    def save_points(self) -> pd.DataFrame:
        """
        Save the generated points to a local file.
        """
        points = self._generate_points()
        points.to_csv(self.config.local_data_file, index=False)
        return points

In [11]:
config = ConfigurationManager()
data_generation_config = config.get_data_generation_config()
data_generation = DataGeneration(config=data_generation_config)
# bbox = BoundingBox(40.5, 40.9, -74.3, -73.7)
df = data_generation.save_points()

[2025-02-16 15:05:25,745: INFO: common: Config file loaded: confs/config.yaml]


In [12]:
df.count()

lat          1000
lon          1000
timestamp    1000
dtype: int64