In [1]:
from copy import deepcopy

In [None]:
def generate_random_route(city_from, city_to):
    merchandise_types = ['milk', 'honey', 'butter', 'tomatoes', 'pens', 'bread', 'coca-cola']
    merchandise = {item: random.randint(1, 50) for item in random.sample(merchandise_types, random.randint(1, len(merchandise_types)))}
    return {'from': city_from, 'to': city_to, 'merchandise': merchandise}

In [ ]:
def generate_random_standard_route(route_id):
    cities = ['Rome', 'Milan', 'Verona', 'Venezia', 'Bergamo', 'Bolzano', 'Trento']
    num_trips = random.randint(2, 4)
    return {'id': f's{route_id}', 'route': [generate_random_route(cities[i], cities[i+1]) for i in range(num_trips)]}

In [ ]:
def introduce_variation(route):
    # Simulate variations by randomly modifying quantities, adding/removing items, and changing cities
    modified_route = deepcopy(route)
    for trip in modified_route['route']:
        for item, quantity in trip['merchandise'].items():
            if random.random() < 0.3:  # 30% chance of modification
                trip['merchandise'][item] = max(1, quantity + random.randint(-5, 5))
        if random.random() < 0.2:  # 20% chance of adding/removing items
            item_to_change = random.choice(['milk', 'honey', 'butter', 'tomatoes', 'pens', 'bread', 'coca-cola'])
            trip['merchandise'][item_to_change] = random.randint(1, 50)
        if random.random() < 0.1:  # 10% chance of changing cities
            trip['from'] = random.choice(['Rome', 'Milan', 'Verona', 'Venezia', 'Bergamo', 'Bolzano', 'Trento'])
            trip['to'] = random.choice(['Rome', 'Milan', 'Verona', 'Venezia', 'Bergamo', 'Bolzano', 'Trento'])
    return modified_route

In [ ]:
def generate_actual_route(route_id, driver, standard_route):
    actual_route = deepcopy(standard_route)
    actual_route['id'] = f'a{route_id}'
    actual_route['driver'] = driver
    actual_route['route'] = [generate_random_route(trip['from'], trip['to']) for trip in standard_route['route']]
    actual_route = introduce_variation(actual_route)
    return actual_route

In [ ]:
def generate_synthetic_dataset(num_standard_routes, num_actual_routes_per_standard):
    standard_routes = [generate_random_standard_route(i) for i in range(1, num_standard_routes + 1)]

    actual_routes = []
    for route_id in range(1, num_actual_routes_per_standard + 1):
        driver = random.choice(['A', 'B', 'C', 'D', 'E'])
        standard_route = random.choice(standard_routes)
        actual_route = generate_actual_route(route_id, driver, standard_route)
        actual_routes.append(actual_route)

    with open('standard.json', 'w') as standard_file:
        json.dump(standard_routes, standard_file, indent=2)

    with open('actual.json', 'w') as actual_file:
        json.dump(actual_routes, actual_file, indent=2)

In [ ]:
if __name__ == "__main__":
    generate_synthetic_dataset(num_standard_routes=100, num_actual_routes_per_standard=200)

## Modifying the synthetic dataset

### Generating connected standard routes

In [8]:
import json

# List of top 50 cities in Italy (from Wikipedia)
cities = [
    "Rome", "Milan", "Naples", "Turin", "Palermo", "Genoa", "Bologna", "Florence",
    "Bari", "Catania", "Verona", "Venice", "Messina", "Padua", "Prato", "Trieste",
    "Brescia", "Parma", "Taranto", "Modena", "Reggio Calabria", "Reggio Emilia",
    "Perugia", "Ravenna", "Livorno", "Rimini", "Cagliari", "Foggia", "Ferrara",
    "Salerno", "Latina", "Giugliano in Campania", "Monza", "Sassari", "Bergamo",
    "Pescara", "Trento", "Forlì", "Syracuse", "Vicenza", "Terni", "Bolzano-Bozen",
    "Piacenza", "Novara", "Ancona", "Udine", "Andria", "Arezzo", "Cesena"
]

# Merchandise types
merchandise_types = ['milk', 'honey', 'butter', 'tomatoes', 'pens', 'bread', 'coca-cola']

In [9]:
def generate_merchandise():
    """Generate a random set of merchandise with quantities."""
    return {
        # merchandise_type: random_quantity for _ in range(random_number_of_items)
        random.choice(merchandise_types): random.randint(1, 50) for _ in range(random.randint(1, 4))
    }

def generate_connected_route(min_trips_, max_trips_):
    """
    Generate a connected route with a random number of trips within the specified constraint.
    
    @param min_trips_: minimum number of trips in the route
    @param max_trips_: maximum number of trips in the route
    
    @return: a connected route with a random number of trips within the specified constraint
    """
    route_length = random.randint(min_trips_, max_trips_)  # Number of trips in the route
    
    # random.sample() returns a list of unique elements, so the route is guaranteed to be connected
    selected_cities = random.sample(cities, route_length + 1)  # One more city for the final destination
    
    # randomize the order of the cities
    random.shuffle(selected_cities)
    
    route_ = []
    for i in range(route_length):  # Generate a trip for each pair of cities
        from_city = selected_cities[i]
        to_city = selected_cities[i + 1]
        route_.append({"from": from_city, "to": to_city, "merchandise": generate_merchandise()})

    return route_

def generate_standard_routes_connected(num_routes, min_trips_, max_trips_):
    """
    Generate a set of standard routes with connected trips and trip number constraints.
    
    @param num_routes: number of routes to generate
    @param min_trips_: minimum number of trips in the route
    @param max_trips_: maximum number of trips in the route
    
    @return: a set of standard routes with connected trips and trip number constraints
    """
    standard_routes = []
    for i in range(num_routes):  # Generate a standard route for each route id
        # Generate a connected route with the specified trip constraints
        route_ = generate_connected_route(min_trips_, max_trips_)
        
        # Add the route to the list of standard routes
        standard_routes.append({"id": f"s{i+1}", "route": route_})  
    
    return standard_routes

In [10]:
# Define minimum and maximum number of trips in a route
min_trips = 3
max_trips = 7

# Generate 10 standard connected routes with the specified trip constraints
standard_connected_routes = generate_standard_routes_connected(10, min_trips, max_trips)

# Print the generated connected routes with constraints
for route in standard_connected_routes:
    print(json.dumps(route, indent=2))

{
  "id": "s1",
  "route": [
    {
      "from": "Venice",
      "to": "Modena",
      "merchandise": {
        "butter": 2
      }
    },
    {
      "from": "Modena",
      "to": "Ravenna",
      "merchandise": {
        "butter": 8,
        "milk": 8
      }
    },
    {
      "from": "Ravenna",
      "to": "Turin",
      "merchandise": {
        "coca-cola": 15,
        "milk": 2
      }
    },
    {
      "from": "Turin",
      "to": "Foggia",
      "merchandise": {
        "pens": 1
      }
    }
  ]
}
{
  "id": "s2",
  "route": [
    {
      "from": "Sassari",
      "to": "Novara",
      "merchandise": {
        "bread": 34,
        "honey": 50,
        "coca-cola": 30
      }
    },
    {
      "from": "Novara",
      "to": "Milan",
      "merchandise": {
        "bread": 32,
        "butter": 12,
        "honey": 5
      }
    },
    {
      "from": "Milan",
      "to": "Cagliari",
      "merchandise": {
        "honey": 16
      }
    },
    {
      "from": "Cagliari",
      

### Generating actual routes with variations

In [None]:
import json
import random

# Constants for the number of drivers and their IDs
NUM_DRIVERS = 20
DRIVERS = [f'D{i}' for i in range(1, NUM_DRIVERS + 1)]

# Function to load the standard routes data
def load_standard_routes(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

# Function to adjust merchandise quantities
def adjust_merchandise(merchandise):
    adjusted = {}
    for item, quantity in merchandise.items():
        decision = random.choice(['increase', 'decrease', 'omit', 'keep'])
        if decision == 'increase':
            adjusted[item] = min(quantity + random.randint(1, 5), 50)  # Cap at 50 for max quantity
        elif decision == 'decrease' and quantity > 1:
            adjusted[item] = max(quantity - random.randint(1, quantity - 1), 1)  # Ensure at least 1
        elif decision == 'keep':
            adjusted[item] = quantity
    return adjusted

In [ ]:
# Load the standard routes
standard_routes = load_standard_routes('standard.json')

In [14]:
def create_actual_route_variation(standard_route, driver_id):
    """
    Create a variation of the standard route to form an actual route.
    Variations include minor changes in the route and merchandise.

    :param standard_route: The original standard route.
    :param driver_id: The ID of the driver for the actual route.
    :return: A varied actual route.
    """
    actual_route = {
        "id": f"a{random.randint(1, 10000)}",  # Unique ID for the actual route
        "driver": driver_id,
        "sroute": standard_route["id"],
        "route": []
    }

    # Iterate over the trips in the standard route to create variations
    for trip in standard_route["route"]:
        # Randomly decide to make a minor detour
        if random.choice([True, False]):
            # Choose a random nearby city for a detour
            detour_city = random.choice(
                [city for city in cities if city != trip["from"] and city != trip["to"]]
            )
            
            # add a detour city in from of the original trip or after the original trip
            if random.choice([True, False]):
                # Add a detour trip (from the detour city to the original city)
                actual_route["route"].append({
                    "from": detour_city, "to": trip["from"], 
                    "merchandise": generate_merchandise()
                })
            else:
                # Add a detour trip (from the original city to the detour city)
                actual_route["route"].append({
                    "from": trip["from"], "to": detour_city, 
                    "merchandise": adjust_merchandise(trip["merchandise"])
                })
                
                # Add the original trip (from the detour city to the original destination)
                actual_route["route"].append({
                    "from": detour_city, "to": trip["to"],
                    "merchandise": generate_merchandise()
                })
        else:
            # Keep the original trip but adjust the merchandise
            actual_route["route"].append({
                "from": trip["from"], "to": trip["to"], 
                "merchandise": adjust_merchandise(trip["merchandise"])
            })

    return actual_route

# Generate actual routes with variations
actual_routes_with_variations = []
for driver in DRIVERS:
    for standard_route in standard_routes:
        # Generate multiple variations for each standard route
        for _ in range(random.randint(1, 3)):
            varied_route = create_actual_route_variation(standard_route, driver)
            actual_routes_with_variations.append(varied_route)

# Display the first few actual routes for review
print(json.dumps(actual_routes_with_variations[:3], indent=2))

[
  {
    "id": "a2329",
    "driver": "D1",
    "sroute": "s1",
    "route": [
      {
        "from": "Vicenza",
        "to": "Giugliano in Campania",
        "merchandise": {
          "honey": 48,
          "milk": 35
        }
      },
      {
        "from": "Giugliano in Campania",
        "to": "Genoa",
        "merchandise": {
          "pens": 21,
          "milk": 2,
          "honey": 22
        }
      },
      {
        "from": "Genoa",
        "to": "Monza",
        "merchandise": {
          "tomatoes": 6
        }
      },
      {
        "from": "Monza",
        "to": "Ravenna",
        "merchandise": {
          "butter": 47
        }
      },
      {
        "from": "Ravenna",
        "to": "Pescara",
        "merchandise": {
          "bread": 41
        }
      },
      {
        "from": "Pescara",
        "to": "Rome",
        "merchandise": {
          "bread": 40
        }
      },
      {
        "from": "Rome",
        "to": "Turin",
        "merchandise": {

In [19]:
standard_routes[0]

{'id': 's1',
 'route': [{'from': 'Vicenza',
   'to': 'Genoa',
   'merchandise': {'honey': 48, 'milk': 39}},
  {'from': 'Genoa',
   'to': 'Monza',
   'merchandise': {'coca-cola': 42, 'tomatoes': 6}},
  {'from': 'Monza',
   'to': 'Pescara',
   'merchandise': {'pens': 49, 'butter': 44}},
  {'from': 'Pescara',
   'to': 'Turin',
   'merchandise': {'bread': 36, 'pens': 3, 'milk': 49}},
  {'from': 'Turin',
   'to': 'Bari',
   'merchandise': {'milk': 40, 'tomatoes': 20, 'bread': 31}}]}

In [20]:
actual_routes_with_variations[0]

{'id': 'a2329',
 'driver': 'D1',
 'sroute': 's1',
 'route': [{'from': 'Vicenza',
   'to': 'Giugliano in Campania',
   'merchandise': {'honey': 48, 'milk': 35}},
  {'from': 'Giugliano in Campania',
   'to': 'Genoa',
   'merchandise': {'pens': 21, 'milk': 2, 'honey': 22}},
  {'from': 'Genoa', 'to': 'Monza', 'merchandise': {'tomatoes': 6}},
  {'from': 'Monza', 'to': 'Ravenna', 'merchandise': {'butter': 47}},
  {'from': 'Ravenna', 'to': 'Pescara', 'merchandise': {'bread': 41}},
  {'from': 'Pescara', 'to': 'Rome', 'merchandise': {'bread': 40}},
  {'from': 'Rome',
   'to': 'Turin',
   'merchandise': {'milk': 33, 'butter': 6, 'honey': 17}},
  {'from': 'Turin', 'to': 'Modena', 'merchandise': {'milk': 43, 'bread': 36}},
  {'from': 'Modena',
   'to': 'Bari',
   'merchandise': {'tomatoes': 12, 'pens': 42, 'butter': 18, 'bread': 36}}]}