In [7]:
import json
import random
from copy import deepcopy

def generate_random_route(city_from, city_to):
    merchandise_types = ['milk', 'honey', 'butter', 'tomatoes', 'pens', 'bread', 'coca-cola']
    merchandise = {item: random.randint(1, 50) for item in random.sample(merchandise_types, random.randint(1, len(merchandise_types)))}
    return {'from': city_from, 'to': city_to, 'merchandise': merchandise}

def generate_random_standard_route(route_id):
    cities = ['Rome', 'Milan', 'Verona', 'Venezia', 'Bergamo', 'Bolzano', 'Trento']
    num_trips = random.randint(2, 4)
    return {'id': f's{route_id}', 'route': [generate_random_route(cities[i], cities[i+1]) for i in range(num_trips)]}

def introduce_variation(route):
    # Simulate variations by randomly modifying quantities, adding/removing items, and changing cities
    modified_route = deepcopy(route)
    for trip in modified_route['route']:
        for item, quantity in trip['merchandise'].items():
            if random.random() < 0.3:  # 30% chance of modification
                trip['merchandise'][item] = max(1, quantity + random.randint(-5, 5))
        if random.random() < 0.2:  # 20% chance of adding/removing items
            item_to_change = random.choice(['milk', 'honey', 'butter', 'tomatoes', 'pens', 'bread', 'coca-cola'])
            trip['merchandise'][item_to_change] = random.randint(1, 50)
        if random.random() < 0.1:  # 10% chance of changing cities
            trip['from'] = random.choice(['Rome', 'Milan', 'Verona', 'Venezia', 'Bergamo', 'Bolzano', 'Trento'])
            trip['to'] = random.choice(['Rome', 'Milan', 'Verona', 'Venezia', 'Bergamo', 'Bolzano', 'Trento'])
    return modified_route

def generate_actual_route(route_id, driver, standard_route):
    actual_route = deepcopy(standard_route)
    actual_route['id'] = f'a{route_id}'
    actual_route['driver'] = driver
    actual_route['route'] = [generate_random_route(trip['from'], trip['to']) for trip in standard_route['route']]
    actual_route = introduce_variation(actual_route)
    return actual_route

def generate_synthetic_dataset(num_standard_routes, num_actual_routes_per_standard):
    standard_routes = [generate_random_standard_route(i) for i in range(1, num_standard_routes + 1)]

    actual_routes = []
    for route_id in range(1, num_actual_routes_per_standard + 1):
        driver = random.choice(['A', 'B', 'C', 'D', 'E'])
        standard_route = random.choice(standard_routes)
        actual_route = generate_actual_route(route_id, driver, standard_route)
        actual_routes.append(actual_route)

    with open('standard.json', 'w') as standard_file:
        json.dump(standard_routes, standard_file, indent=2)

    with open('actual.json', 'w') as actual_file:
        json.dump(actual_routes, actual_file, indent=2)

if __name__ == "__main__":
    generate_synthetic_dataset(num_standard_routes=100, num_actual_routes_per_standard=200)
