In [8]:
import pandas as pd
import numpy as np
import uuid
import json

In [13]:
DATA = "https://raw.githubusercontent.com/mgranica/route_optimizer_platform/refs/heads/main/data_sources_generator/synthetic_data/results/clients_geo.csv"
max_weight_per_order = 50
max_vol_per_order = 100
status = "active"

In [14]:
def generate_orders(data, status, max_vol_per_order, max_weight_per_order):
    # Filter for clients with the specified status
    filtered_data = data[data['status'] == status]
    
    # Generate orders using list comprehension
    orders_list = [
        {
            'client_id': client_id, 
            'order_id': str(uuid.uuid4()),  # Generate a unique order_id
            'volume': np.random.uniform(1, max_vol_per_order),  # Randomly generate volume
            'weight': np.random.uniform(1, max_weight_per_order),  # Randomly generate weight
            'n_objects': np.random.randint(1, 10)  # Randomly generate number of objects
        } 
        for client_id in filtered_data['client_id']
    ]

    # Convert the list of dictionaries to a DataFrame
    orders = pd.DataFrame(orders_list)
    
    return orders

In [15]:
def transform_to_json(clients):
    """
    Transforms a DataFrame of client data into a JSON string with a nested structure.

    Parameters:
    - clients: pandas.DataFrame

    Returns:
    - str: A JSON string representation of the transformed client data.
    """
    clients_final = [
        {
            'client_id': row['client_id'],
            'location': { 
                'address': row['address'],
                'lat': row['lat'],
                'lon': row['lon']
            },
            'order_id': row['order_id'],
            'order': { 
                'n_objects': row['n_objects'],
                'volume': row['volume'],
                'weight': row['weight']
            },
            'status': row['status']
        }
        for _, row in clients.iterrows()
    ]

    # Convert to JSON and return the string
    return json.dumps(clients_final, indent=4)


In [16]:
# Load data
users = pd.read_csv(DATA)

# Clean dataset keeping only relevant data
users_clean = (
    users
    .loc[:, ["client_id", "address", "status", "points","lat", "lon"]]
)

In [17]:
# Generate and store orders from Active clients
orders = generate_orders(users_clean, status, max_vol_per_order, max_weight_per_order)

# Perform an inner join between orders and users_clean DataFrames
clients = pd.merge(orders, users_clean, left_on='client_id', right_on='client_id', how='inner')

In [18]:
# Create a new DataFrame with nested structure
clients_final = transform_to_json(clients)

# Print the final JSON string
print(clients_final)

[
    {
        "client_id": 298104694689,
        "location": {
            "address": "Bypass Sur",
            "lat": 40.390130799999994,
            "lon": -3.683518876417766
        },
        "order_id": "38404728-3397-47c9-b4dc-7d09e52ff60e",
        "order": {
            "n_objects": 8,
            "volume": 86.55173219249205,
            "weight": 8.31633052025059
        },
        "status": "active"
    },
    {
        "client_id": 484625614518,
        "location": {
            "address": "Calle de Embajadores188",
            "lat": 40.39298055,
            "lon": -3.6942722908057015
        },
        "order_id": "ecb7e80d-1f16-425d-8815-442c3ecc96c0",
        "order": {
            "n_objects": 4,
            "volume": 34.24246090179633,
            "weight": 30.384070692867773
        },
        "status": "active"
    },
    {
        "client_id": 355616727318,
        "location": {
            "address": "Calle de Antonio Nebrija",
            "lat": 40.4036985,
    

In [22]:
data_path = '/Users/borja/Documents/Somniumrema/projects/de/route_optimizer/notebooks/data'
# Save the JSON string to a file
with open(f'{data_path}/clients_final.json', 'w') as file:
    file.write(clients_final)

# Print a confirmation message
print("Se ha guardado 'clients_final.json'")

Se ha guardado 'clients_final.json'
