In [16]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Constants
NUM_ENTITIES = 30
MIN_TIMESTAMPS = 1000
MAX_TIMESTAMPS = 3000
CITY_COUNTRY_MAP = {
    'New York': 'USA',
    'London': 'UK',
    'Tokyo': 'Japan',
    'Paris': 'France',
    'Berlin': 'Germany',
    'Sydney': 'Australia',
    'Singapore': 'Singapore',
    'Dubai': 'UAE',
    'Mumbai': 'India',
    'São Paulo': 'Brazil'
}
CITIES = list(CITY_COUNTRY_MAP.keys())
COUNTRIES = list(CITY_COUNTRY_MAP.values())
DISTRICTS = ['Downtown', 'Suburban', 'Industrial', 'Residential', 'Commercial']
ADMIN_CLASSES = ['national', 'provincial', 'municipal', 'village']

# Generate entity names (roads)
road_names = [f"Road_{i:03d}" for i in range(NUM_ENTITIES)]

# Generate timestamps (hourly data up to now)
end_time = datetime.now()
start_time = end_time - timedelta(hours=MAX_TIMESTAMPS)

# Create the time series data
all_data = []
all_entities = []
all_timestamps = []

for entity in road_names:
    # Random number of timestamps for this entity
    n_timestamps = random.randint(MIN_TIMESTAMPS, MAX_TIMESTAMPS)
    entity_timestamps = pd.date_range(
        end=end_time, periods=n_timestamps, freq='H')

    # Generate traffic data (cars per hour)
    # Base traffic level for this road
    base_traffic = np.random.randint(100, 1000)
    traffic = base_traffic + \
        np.random.normal(0, 100, n_timestamps)  # Add some noise
    traffic = np.maximum(0, traffic)  # Ensure non-negative
    traffic = traffic.astype(int)  # Convert to integer

    # Generate temperature data (in Celsius)
    base_temp = np.random.uniform(10, 25)  # Base temperature for this location
    # Add daily/seasonal variation
    temp = base_temp + np.random.normal(0, 5, n_timestamps)

    # Generate rain data (binary)
    # Different rain probabilities for different locations
    rain_prob = np.random.uniform(0.1, 0.3)
    rain = np.random.binomial(1, rain_prob, n_timestamps)

    # Combine all features
    entity_data = np.column_stack([traffic, temp, rain])

    all_data.append(entity_data)
    all_entities.extend([entity] * n_timestamps)
    all_timestamps.extend(entity_timestamps)

# Create the main time series DataFrame
miya_data = pd.DataFrame(
    np.vstack(all_data),
    index=pd.MultiIndex.from_arrays(
        [all_entities, all_timestamps],
        names=['entity', 'timestamp']
    ),
    columns=['cars_per_hour', 'temperature', 'is_raining']
)

# Create metadata
miya_metadata = pd.DataFrame({
    'city': np.random.choice(CITIES, NUM_ENTITIES),
    'district': np.random.choice(DISTRICTS, NUM_ENTITIES),
    'district_population': np.random.randint(10000, 1000000, NUM_ENTITIES),
    'city_population': np.random.randint(100000, 10000000, NUM_ENTITIES),
    'administrative_class': np.random.choice(ADMIN_CLASSES, NUM_ENTITIES)
}, index=road_names)
miya_metadata.index.name = 'entity'

# Map cities to their correct countries
miya_metadata['country'] = miya_metadata['city'].map(CITY_COUNTRY_MAP)

# Create miya_mapping for administrative_class
miya_mapping = {
    'administrative_class': {i: cls for i, cls in enumerate(ADMIN_CLASSES)}
}

# Convert administrative_class to integers
miya_metadata['administrative_class'] = miya_metadata['administrative_class'].map(
    {v: k for k, v in miya_mapping['administrative_class'].items()}
)

# Generate classification labels (accidents)
accident_labels = []
for entity in road_names:
    n_accidents = random.randint(5, 45)
    entity_data = miya_data.loc[entity]

    for _ in range(n_accidents):
        # Randomly select a timestamp for the accident
        accident_time = random.choice(entity_data.index)
        # Accident duration between 1 and 4 hours
        duration = random.randint(1, 4)

        accident_labels.append({
            'entity': entity,
            'start_timestamp': accident_time,
            'end_timestamp': accident_time + timedelta(hours=duration),
            'label': 1  # 1 indicates accident
        })

miya_labels = pd.DataFrame(accident_labels)

# Generate segmentation labels (traffic congestion levels)
segmentation_labels = []
for entity in road_names:
    entity_data = miya_data.loc[entity]
    traffic = entity_data['cars_per_hour']

    # Define congestion thresholds
    thresholds = {
        0: 0,      # Free flow
        1: 500,    # Mild congestion
        2: 800,    # Heavy congestion
        3: 1000    # Full stop
    }

    # Assign congestion levels based on traffic
    congestion_levels = np.zeros(len(traffic))
    for level, threshold in thresholds.items():
        congestion_levels[traffic >= threshold] = level

    # Create segmentation labels
    entity_segmentation = pd.DataFrame({
        'congestion_level': congestion_levels
    }, index=entity_data.index)
    entity_segmentation.index = pd.MultiIndex.from_arrays(
        [[entity] * len(traffic), entity_data.index],
        names=['entity', 'timestamp']
    )
    segmentation_labels.append(entity_segmentation)

miya_segmentation_labels = pd.concat(segmentation_labels)

  entity_timestamps = pd.date_range(


In [17]:
miya_labels

Unnamed: 0,entity,start_timestamp,end_timestamp,label
0,Road_000,2025-05-25 02:51:46.531564,2025-05-25 04:51:46.531564,1
1,Road_000,2025-05-30 03:51:46.531564,2025-05-30 06:51:46.531564,1
2,Road_000,2025-03-15 14:51:46.531564,2025-03-15 16:51:46.531564,1
3,Road_000,2025-05-25 15:51:46.531564,2025-05-25 18:51:46.531564,1
4,Road_000,2025-04-30 22:51:46.531564,2025-05-01 00:51:46.531564,1
...,...,...,...,...
669,Road_029,2025-05-23 06:51:46.531564,2025-05-23 07:51:46.531564,1
670,Road_029,2025-05-28 05:51:46.531564,2025-05-28 09:51:46.531564,1
671,Road_029,2025-04-29 21:51:46.531564,2025-04-30 00:51:46.531564,1
672,Road_029,2025-06-18 03:51:46.531564,2025-06-18 04:51:46.531564,1


In [18]:
miya_segmentation_labels

Unnamed: 0_level_0,Unnamed: 1_level_0,congestion_level
entity,timestamp,Unnamed: 2_level_1
Road_000,2025-03-14 12:51:46.531564,0.0
Road_000,2025-03-14 13:51:46.531564,0.0
Road_000,2025-03-14 14:51:46.531564,0.0
Road_000,2025-03-14 15:51:46.531564,0.0
Road_000,2025-03-14 16:51:46.531564,0.0
...,...,...
Road_029,2025-06-18 12:51:46.531564,0.0
Road_029,2025-06-18 13:51:46.531564,0.0
Road_029,2025-06-18 14:51:46.531564,0.0
Road_029,2025-06-18 15:51:46.531564,0.0


In [19]:
miya_data

Unnamed: 0_level_0,Unnamed: 1_level_0,cars_per_hour,temperature,is_raining
entity,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Road_000,2025-03-14 12:51:46.531564,146.0,7.787308,0.0
Road_000,2025-03-14 13:51:46.531564,253.0,10.976357,1.0
Road_000,2025-03-14 14:51:46.531564,249.0,1.563970,1.0
Road_000,2025-03-14 15:51:46.531564,338.0,14.186468,0.0
Road_000,2025-03-14 16:51:46.531564,110.0,3.333402,0.0
...,...,...,...,...
Road_029,2025-06-18 12:51:46.531564,395.0,21.340200,0.0
Road_029,2025-06-18 13:51:46.531564,315.0,15.088576,0.0
Road_029,2025-06-18 14:51:46.531564,376.0,11.368977,0.0
Road_029,2025-06-18 15:51:46.531564,343.0,13.896923,1.0


In [20]:
miya_metadata

Unnamed: 0_level_0,city,district,district_population,city_population,administrative_class,country
entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Road_000,New York,Residential,744028,3131809,3,USA
Road_001,Dubai,Industrial,989841,2920730,1,UAE
Road_002,Mumbai,Commercial,153130,5263602,2,India
Road_003,London,Residential,738046,9901576,3,UK
Road_004,São Paulo,Suburban,167742,1946237,3,Brazil
Road_005,Paris,Downtown,822598,2827174,3,France
Road_006,Paris,Industrial,869493,3913093,1,France
Road_007,Dubai,Industrial,72608,4548866,2,UAE
Road_008,Tokyo,Commercial,761871,724075,1,Japan
Road_009,Singapore,Residential,359576,4281636,0,Singapore
