In [1]:
import os
import random
import uuid
import numpy as np
from faker import Faker
from datetime import datetime
import time
import boto3
import json
from dotenv import load_dotenv

In [2]:
# Load environment variables from .env file
load_dotenv()

# Initialize AWS credentials from the .env file
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_SESSION_TOKEN = os.getenv('AWS_SESSION_TOKEN')
AWS_REGION = os.getenv('AWS_REGION')

In [3]:
# Initialize boto3 client for Kinesis with your credentials
kinesis_client = boto3.client(
    'kinesis',
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    aws_session_token=AWS_SESSION_TOKEN,
    region_name=AWS_REGION
)

In [4]:
# Initialize Faker
fake = Faker('es-ES')

# Constants for random address generation in Madrid
LAT_MIN, LAT_MAX = 40.3121, 40.6521
LON_MIN, LON_MAX = -3.8666, -3.5673
max_vol_per_order = 500.0
max_weight_per_order = 100.0
status = "RECEIVED"

stream_name = 'OrderStreamForDispatching'  # Update with your Kinesis stream name

In [5]:
# List of neighborhoods in Madrid
neighborhoods_madrid = [
    "Chueca", "Lavapiés", "Malasaña", "Salamanca", "La Latina", 
    "Chamberí", "Retiro", "Arganzuela", "Tetuan", "Usera", 
    "Carabanchel", "Vallecas", "Moncloa", "Hortaleza", "Barajas",
    "Chamartín", "Villaverde", "Fuencarral-El Pardo"
]

In [6]:
# Function to generate a random address in Madrid
def generate_random_address():
    street_name = fake.street_name()
    street_number = fake.building_number()
    lat = random.uniform(LAT_MIN, LAT_MAX)
    lon = random.uniform(LON_MIN, LON_MAX)

    # Randomly select a neighborhood from the list
    neighborhood = random.choice(neighborhoods_madrid)

    return {
        "address_id": str(uuid.uuid4()),
        "neighborhood": neighborhood,
        "coordinates": [lat, lon],
        "road": street_name,
        "house_number": street_number,
        "suburb": fake.city_suffix(),
        "city_district": fake.city(),
        "state": fake.state(),
        "postcode": fake.postcode(),
        "country": "Spain",
        "lat": lat,
        "lon": lon
    }

# Function to generate random payment details
def generate_random_payment_details():
    return {
        "payment_method": random.choice(["Credit Card", "PayPal", "Cash"]),
        "payment_status": random.choice(["PAID", "PENDING"]),
        "transaction_id": str(uuid.uuid4())
    }

# Function to generate random items for the order
def generate_random_items():
    n_items = np.random.randint(1, 5)  # Number of items in the order
    items = []
    for _ in range(n_items):
        product_id = str(uuid.uuid4())
        product_name = fake.word()
        price = np.random.uniform(5.0, 200.0)
        quantity = np.random.randint(1, 10)
        n_packages = np.random.randint(1, 4)
        
        # Generate packages for each item
        packages = []
        for _ in range(n_packages):
            package = {
                "package_id": str(uuid.uuid4()),
                "subpackage_id": np.random.randint(1, 100),
                "quantity": np.random.randint(1, quantity + 1),
                "weight": np.random.uniform(0.1, 10.0),
                "volume": np.random.uniform(0.1, 5.0)
            }
            packages.append(package)
        
        item = {
            "product_id": product_id,
            "product_name": product_name,
            "price": price,
            "quantity": quantity,
            "packages": packages
        }
        items.append(item)
    return items

# Function to generate a random order for Kinesis
def generate_random_order():
    order_id = str(uuid.uuid4())
    volume = np.random.uniform(1, max_vol_per_order)
    weight = np.random.uniform(1, max_weight_per_order)
    total_price = np.random.uniform(10.0, 1000.0)
    
    order_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    order_details = {
        "customer_id": f"cus-{uuid.uuid4()}",
        "order_timestamp": order_timestamp,
        "order_date": datetime.now().strftime("%Y-%m-%d"),
        "items": generate_random_items(),
        "total_amount": total_price,
        "total_volume": volume,
        "total_weight": weight,
        "status": status,
        "destination_address": generate_random_address(),
        "payment_details": generate_random_payment_details()
    }
    
    # Create the full order structure for Kinesis
    order = {
        "event_id": str(uuid.uuid4()),
        "event_type": "ORDER_CREATED",
        "event_timestamp": order_timestamp,
        "order_id": order_id,
        "order_details": order_details
    }
    
    print("Generated order:", order)  # Print the generated order for debugging
    return order

In [7]:
# Function to send the generated order to Kinesis
def send_order_to_kinesis(stream_name, order):
    order_data = json.dumps(order)
    
    response = kinesis_client.put_record(
        StreamName=stream_name,
        Data=order_data.encode('utf-8'),
        PartitionKey=str(order['order_details']['customer_id'])  # Partition based on customer_id
    )
    print(f"Order {order['order_id']} sent to Kinesis. Response: {response}")

# Function to generate orders at random intervals and send them to Kinesis
def order_stream_generator(stream_name):
    while True:
        order = generate_random_order()
        send_order_to_kinesis(stream_name, order)
        
        # Sleep for a random amount of time between 10 and 50 seconds (can be adjusted)
        sleep_time = random.randint(5, 10)  # Random sleep time in seconds
        print(f"Sleeping for {sleep_time} seconds")
        time.sleep(sleep_time)

In [8]:
# Start the order stream generator
order_stream_generator(stream_name)

Generated order: {'event_id': '2a932339-37ac-4efa-b295-2816f10997bf', 'event_type': 'ORDER_CREATED', 'event_timestamp': '2024-10-16 23:15:01', 'order_id': 'acd3d058-ccad-49a6-9d36-a41da9974268', 'order_details': {'customer_id': 'cus-c0da1376-2c09-427f-bfb4-5c7060ebfc40', 'order_timestamp': '2024-10-16 23:15:01', 'order_date': '2024-10-16', 'items': [{'product_id': 'e9f54114-3ba0-4a2f-b315-ed9adea0cae8', 'product_name': 'rem', 'price': 46.659287016488975, 'quantity': 8, 'packages': [{'package_id': '9e597125-81fe-4a79-93a0-a81432a49c25', 'subpackage_id': 72, 'quantity': 8, 'weight': 5.511161132722182, 'volume': 4.4478177976069215}, {'package_id': '0f65b51d-6f7f-4d71-96eb-433dc937e917', 'subpackage_id': 67, 'quantity': 3, 'weight': 2.0042314199301274, 'volume': 2.7783683139050406}, {'package_id': '8fa8917c-10d3-496d-9473-eb03549fbcde', 'subpackage_id': 39, 'quantity': 6, 'weight': 7.845040280059994, 'volume': 4.513802816555738}]}, {'product_id': '132885a6-c534-4388-a4c2-7478eed28bf9', 'pr

KeyboardInterrupt: 

In [5]:
# # Function to generate a random address in Madrid
# def generate_random_address():
#     street_name = fake.street_name()
#     street_number = fake.building_number()
#     lat = random.uniform(LAT_MIN, LAT_MAX)
#     lon = random.uniform(LON_MIN, LON_MAX)
#     address = f"{street_name}, {street_number}, Madrid, Spain"
#     return {
#         "address": address,
#         "lat": lat,
#         "lon": lon
#     }

# # Function to generate a random order
# def generate_random_order(client_id):
#     order_id = str(uuid.uuid4())
#     volume = np.random.uniform(1, max_vol_per_order)
#     weight = np.random.uniform(1, max_weight_per_order)
#     n_objects = np.random.randint(1, 10)
    
#     # Ensure the timestamp is generated in the correct format (Y-M-D H:M:S)
#     order_timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
#     # Ensure the total price is a random float and never 0
#     total_price = np.random.uniform(10.0, 1000.0)
    
#     order = {
#         "order_id": order_id,
#         "customer_id": f"cus-{uuid.uuid4()}",  # Simulating customer_id format
#         "total_weight": weight,
#         "total_volume": volume,
#         "total_price": total_price,
#         "order_timestamp": order_timestamp,  # Formatted timestamp
#         "status": status,
#         "lat": random.uniform(LAT_MIN, LAT_MAX),
#         "lon": random.uniform(LON_MIN, LON_MAX)
#     }
#     print("Generated order:", order)  # Print the generated order for debugging
#     return order

In [6]:
# # Function to send the generated order to Kinesis
# def send_order_to_kinesis(stream_name, order):
#     order_data = json.dumps(order)
    
#     response = kinesis_client.put_record(
#         StreamName=stream_name,
#         Data=order_data.encode('utf-8'),
#         PartitionKey=str(order['customer_id'])  # Partition based on customer_id
#     )
#     print(f"Order {order['order_id']} sent to Kinesis. Response: {response}")

# # Function to generate orders at random intervals and send them to Kinesis
# def order_stream_generator(stream_name):
#     while True:
#         client_id = random.randint(100000000000, 999999999999)
#         order = generate_random_order(client_id)
#         send_order_to_kinesis(stream_name, order)
        
#         # Sleep for a random amount of time between 10 and 50 seconds (can be adjusted)
#         sleep_time = random.randint(1, 2)  # Random sleep time in seconds
#         print(f"Sleeping for {sleep_time / 60} minutes")
#         time.sleep(sleep_time)


In [None]:
# # Start the order stream generator
# order_stream_generator(stream_name)

import time
import random
import uuid
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from IPython.display import display

# Constants for latitude and longitude ranges
LAT_MIN, LAT_MAX = 40.0, 41.0
LON_MIN, LON_MAX = -4.0, -3.0

# Function to generate a random order
def generate_random_order():
    order_id = str(uuid.uuid4())
    weight = random.uniform(1, 100)
    volume = random.uniform(1, 100)
    total_price = random.uniform(10, 1000)
    order_timestamp = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
    status = random.choice(["RECEIVED", "PROCESSING", "SHIPPED", "DELIVERED"])
    
    return {
        "order_id": order_id,
        "customer_id": f"cus-{uuid.uuid4()}",
        "total_weight": weight,
        "total_volume": volume,
        "total_price": total_price,
        "order_timestamp": order_timestamp,
        "status": status,
        "lat": random.uniform(LAT_MIN, LAT_MAX),
        "lon": random.uniform(LON_MIN, LON_MAX)
    }

# Initialize order data list
order_data = []

# Create a figure with subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, subplot_titles=("Total Weight", "Total Volume", "Total Price"))

# Add traces for each subplot
fig.add_trace(go.Scatter(x=[], y=[], mode='lines+markers', name='Total Weight', text=[]), row=1, col=1)
fig.add_trace(go.Scatter(x=[], y=[], mode='lines+markers', name='Total Volume', text=[]), row=2, col=1)
fig.add_trace(go.Scatter(x=[], y=[], mode='lines+markers', name='Total Price', text=[]), row=3, col=1)

# Update layout to increase figure size
fig.update_layout(height=800, width=1000, title_text="Order Data Over Time")

# Display the figure
fig_widget = go.FigureWidget(fig)
display(fig_widget)

def stream_orders(stream_duration):
    start_time = time.time()
    
    while time.time() - start_time < stream_duration:
        # Generate a random order
        order = generate_random_order()
        order_data.append(order)
        
        # Print the generated order
        print(f"Generated order: {order}")
        
        # Convert to pandas DataFrame
        df = pd.DataFrame(order_data)

        # Convert 'order_timestamp' to datetime
        df['order_timestamp'] = pd.to_datetime(df['order_timestamp'])

        # Create text annotations
        annotations = df.apply(lambda row: f"Order ID: {row['order_id']}<br>Customer ID: {row['customer_id']}", axis=1)

        # Update the live plot with new data
        with fig_widget.batch_update():
            fig_widget.data[0].x = df['order_timestamp']
            fig_widget.data[0].y = df['total_weight']
            fig_widget.data[0].text = annotations
            
            fig_widget.data[1].x = df['order_timestamp']
            fig_widget.data[1].y = df['total_volume']
            fig_widget.data[1].text = annotations
            
            fig_widget.data[2].x = df['order_timestamp']
            fig_widget.data[2].y = df['total_price']
            fig_widget.data[2].text = annotations

        # Sleep for a random duration between 0.2 to 0.5 minutes
        sleep_duration = random.uniform(2, 5)/100
        print(f"Sleeping for {sleep_duration } minutes")
        time.sleep(sleep_duration * 60)

# Run the stream for 10 minutes (600 seconds)
stream_orders(600)