In [1]:
from pyspark.sql import SparkSession
import os

# Initialize Spark session with Delta and S3 settings
spark = SparkSession.builder \
    .appName("KinesisToDeltaLake") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0,org.apache.hadoop:hadoop-aws:3.3.2,com.amazonaws:aws-java-sdk-bundle:1.11.1026") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.access.key", os.getenv("AWS_ACCESS_KEY_ID")) \
    .config("spark.hadoop.fs.s3a.secret.key", os.getenv("AWS_SECRET_ACCESS_KEY")) \
    .config("spark.hadoop.fs.s3a.session.token", os.getenv("AWS_SESSION_TOKEN")) \
    .config("spark.hadoop.fs.s3a.endpoint", "s3.amazonaws.com") \
    .config("spark.sql.files.maxPartitionBytes", "134217728") \
    .getOrCreate()


24/09/30 20:28:11 WARN Utils: Your hostname, Somnium.local resolves to a loopback address: 127.0.0.1; using 172.28.59.194 instead (on interface en0)
24/09/30 20:28:11 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/Users/borja/Library/Caches/pypoetry/virtualenvs/route-optimizer-AqO2e-Ud-py3.11/lib/python3.11/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /Users/borja/.ivy2/cache
The jars for the packages stored in: /Users/borja/.ivy2/jars
io.delta#delta-core_2.12 added as a dependency
org.apache.hadoop#hadoop-aws added as a dependency
com.amazonaws#aws-java-sdk-bundle added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-bd7cb223-11e7-4c1c-af8b-8e61d450a728;1.0
	confs: [default]
	found io.delta#delta-core_2.12;2.4.0 in central
	found io.delta#delta-storage;2.4.0 in central
	found org.antlr#antlr4-runtime;4.9.3 in central
	found org.apache.hadoop#hadoop-aws;3.3.2 in central
	found com.amazonaws#aws-java-sdk-bundle;1.11.1026 in central
	found org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central
:: resolution report :: resolve 261ms :: artifacts dl 13ms
	:: modules in use:
	com.amazonaws#aws-java-sdk-bundle;1.11.1026 from central in [default]
	io.delta#delta-core_2.12;2.4.0 from central in [default]
	io.delta#delta-storage;2.4.0 from central in [default]
	org.antlr#antlr4-r

In [2]:
import boto3
import json
import time
from dotenv import load_dotenv
from datetime import datetime

# Load environment variables from .env file
load_dotenv()

# Initialize AWS credentials from the .env file
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
AWS_SESSION_TOKEN = os.getenv('AWS_SESSION_TOKEN')
AWS_REGION = os.getenv('AWS_REGION')


In [3]:
# Initialize boto3 client for Kinesis with your credentials
kinesis_client = boto3.client(
    'kinesis',
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    aws_session_token=AWS_SESSION_TOKEN,
    region_name=AWS_REGION
)

In [4]:
# Function to update order status to "ready_for_dispatch"
def update_order_status(orders):
    for order in orders:
        order['status'] = 'ready_for_dispatch'
    return orders

# Function to write to Delta table
def write_to_delta(df, delta_table_path):
    df.write.format("delta").mode("append").save(delta_table_path)
    print(f"Written {df.count()} records to Delta table.")

# Function to filter only new orders (with status 'in_progress')
def filter_new_orders(orders):
    return [order for order in orders if order['status'] == 'in_progress']

In [5]:
import boto3
import json
import time


# Function to get the shard iterator
def get_shard_iterator(stream_name, shard_id):
    response = kinesis_client.get_shard_iterator(
        StreamName=stream_name,
        ShardId=shard_id,
        ShardIteratorType='TRIM_HORIZON'  # or 'LATEST' for new records
    )
    return response['ShardIterator']

# Function to read records from the Kinesis stream
def read_kinesis_records(shard_iterator):
    response = kinesis_client.get_records(ShardIterator=shard_iterator, Limit=100)
    records = response['Records']
    return records, response['NextShardIterator']

# Fetch Kinesis records
def fetch_kinesis_records(stream_name, shard_id):
    shard_iterator = get_shard_iterator(stream_name, shard_id)
    
    print("Fetching records from Kinesis stream...")
    records, shard_iterator = read_kinesis_records(shard_iterator)

    # Extract order data from records
    orders = []
    for record in records:
        order_data = record['Data']
        order = json.loads(order_data)
        print("Received order:", order)
        orders.append(order)

    return orders


In [6]:
delta_table_path_ready_for_dispatch = 's3a://orders-for-dispatch/ready_for_dispatching'

In [7]:
# Dispatcher function with fetch_kinesis_records integration
def dispatcher():
    stream_name = 'OrderStreamForDispatching'
    shard_id = 'shardId-000000000000'
    max_weight = 100  # Threshold for total weight
    max_volume = 500  # Threshold for total volume
    dispatch_time = datetime.strptime('12:00', '%H:%M').time()  # Dispatch by noon
    order_buffer = []
    
    accumulated_weight = 0
    accumulated_volume = 0

    print("Starting dispatcher...")

    while True:
        # Fetch records from Kinesis
        orders = fetch_kinesis_records(stream_name, shard_id)

        for order in orders:
            order_buffer.append(order)
            accumulated_weight += order['order']['weight']
            accumulated_volume += order['order']['volume']
            print(f"Accumulated weight: {accumulated_weight}, volume: {accumulated_volume}")

        # Check if any dispatch criteria is met: weight, volume, or time
        if (accumulated_weight >= max_weight or 
            accumulated_volume >= max_volume or 
            datetime.now().time() >= dispatch_time) and order_buffer:
            
            print("Threshold met. Dispatching orders...")

            # Update the order status to "ready_for_dispatch"
            ready_orders = update_order_status(order_buffer)

            # Create a DataFrame for the orders if the buffer is not empty
            if ready_orders:
                df = spark.createDataFrame(ready_orders)

                # Write the orders to the "ready_for_dispatch" Delta table
                write_to_delta(df, delta_table_path_ready_for_dispatch)

                # # Send the batch of orders to the route optimizer (Routific)
                # if send_to_route_optimizer(ready_orders):
                #     print("Orders successfully sent to route optimizer.")

            # Clear buffer and reset accumulators after dispatch
            order_buffer = []
            accumulated_weight = 0
            accumulated_volume = 0

        else:
            print(f"Threshold not met yet. Weight: {accumulated_weight}, Volume: {accumulated_volume}")

        # Sleep for a short time before fetching new records (to avoid throttling)
        time.sleep(5)


In [8]:
dispatcher()

Starting dispatcher...
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c-2fed-42b2-93

24/09/30 20:28:18 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
24/09/30 20:28:26 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
24/09/30 20:28:30 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:29:03 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:29:26 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:29:39 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:29:52 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:30:16 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:30:32 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:30:46 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:31:48 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:32:04 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
                                                                                

Written 100 records to Delta table.
Fetching records from Kinesis stream...
Received order: {'client_id': 526061949432, 'location': {'address': 'Cuesta de Jacinta Aliaga, 885, Madrid, Spain', 'lat': 40.48663528941934, 'lon': -3.86801853646353}, 'order_id': '66b7045d-821e-43e4-a65a-8010c685090e', 'order': {'n_objects': 8, 'volume': 26.967033881885307, 'weight': 49.06647286407531}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:21'}
Received order: {'client_id': 250134210269, 'location': {'address': 'Vial de Julie Santamaria, 11, Madrid, Spain', 'lat': 40.64034937337452, 'lon': -3.6961708148562606}, 'order_id': '3e0379b8-ffd0-461f-8b9a-df7e4fcdfd4c', 'order': {'n_objects': 8, 'volume': 92.1583121744296, 'weight': 31.08281969586028}, 'status': 'in_progress', 'timestamp': '2024-09-29 20:35:36'}
Received order: {'client_id': 563599983792, 'location': {'address': 'Cuesta de María Morell, 43, Madrid, Spain', 'lat': 40.55202562462943, 'lon': -3.5678584538759948}, 'order_id': '472e454c

24/09/30 20:32:18 WARN MemoryManager: Total allocation exceeds 95.00% (1,020,054,720 bytes) of heap memory
Scaling row group sizes to 95.00% for 8 writers
ERROR:root:KeyboardInterrupt while sending command.                             
Traceback (most recent call last):
  File "/Users/borja/Library/Caches/pypoetry/virtualenvs/route-optimizer-AqO2e-Ud-py3.11/lib/python3.11/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/borja/Library/Caches/pypoetry/virtualenvs/route-optimizer-AqO2e-Ud-py3.11/lib/python3.11/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
                          ^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/borja/.pyenv/versions/3.11.8/lib/python3.11/socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt


KeyboardInterrupt: 

                                                                                