In [None]:
!pip install fastavro faker

Collecting fastavro
  Downloading fastavro-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting faker
  Downloading faker-37.0.0-py3-none-any.whl.metadata (15 kB)
Downloading fastavro-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faker-37.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fastavro, faker
Successfully installed faker-37.0.0 fastavro-1.10.0


In [None]:
import json
import random
import fastavro
import datetime
from faker import Faker

fake = Faker()

# AVRO schemas ANNA & ANITA
ride_request_schema = {
    "type": "record",
    "name": "RideRequest",
    "fields": [
        {"name": "user_id", "type": "string"},
        {"name": "pickup_location", "type": "string"},
        {"name": "destination", "type": "string"},
        {"name": "timestamp", "type": "string"},
        {"name": "status", "type": {"type": "enum", "name": "Status", "symbols": ["requested", "cancelled", "accepted", "ongoing", "completed"]}},
        {"name": "price", "type": "double", "default": 0.0}
    ]
}

driver_availability_schema = {
    "type": "record",
    "name": "DriverAvailability",
    "fields": [
        {"name": "driver_id", "type": "string"},
        {"name": "location", "type": "string"},
        {"name": "status", "type": {"type": "enum", "name": "DriverStatus", "symbols": ["available", "busy"]}},
        {"name": "timestamp", "type": "string"}
    ]
}

# Function to generate ride requests MARIA
def generate_ride_request(n=100):
    requests = []
    statuses = ["requested", "cancelled", "accepted", "ongoing", "completed"]

    for _ in range(n):
        status = random.choice(statuses)

        start_time = datetime.datetime.utcnow()
        end_time = start_time + datetime.timedelta(minutes=random.randint(5, 45)) if status in ["ongoing", "completed"] else None

        request = {
            "user_id": fake.uuid4(),
            "pickup_location": fake.city(),
            "destination": fake.city(),
            "timestamp": start_time.isoformat(),
            "status": status,
            "price": round(random.uniform(5, 50), 2)
        }
        requests.append(request)
    return requests

# Function to generate driver availability updates NORA
def generate_driver_availability(n=50):
    availabilities = []
    statuses = ["available", "busy"]

    for _ in range(n):
        availability = {
            "driver_id": fake.uuid4(),
            "location": fake.city(),
            "status": random.choice(statuses),
            "timestamp": datetime.datetime.utcnow().isoformat()
        }
        availabilities.append(availability)
    return availabilities

# JSON and AVRO formats CRIS
def save_data(data, schema, filename):
    json_filename = f"{filename}.json"
    avro_filename = f"{filename}.avro"

    with open(json_filename, "w") as json_file:
        json.dump(data, json_file, indent=4)

    with open(avro_filename, "wb") as avro_file:
        fastavro.writer(avro_file, schema, data)

    print(f"Data saved: {json_filename}, {avro_filename}")

ride_requests = generate_ride_request(200)
driver_availabilities = generate_driver_availability(100)

save_data(ride_requests, ride_request_schema, "ride_requests")
save_data(driver_availabilities, driver_availability_schema, "driver_availability")


Data saved: ride_requests.json, ride_requests.avro
Data saved: driver_availability.json, driver_availability.avro


In [None]:
file_path = "ride_requests.avro"

records = []
with open(file_path, "rb") as avro_file:
    reader = fastavro.reader(avro_file)
    for record in reader:
        records.append(record)

for record in records[:10]:
    print(record)

{'user_id': '77d96da2-46cd-43c7-86e4-8b14c3900c0a', 'pickup_location': 'Jameschester', 'destination': 'Lake Christopher', 'timestamp': '2025-03-16T15:51:11.230331', 'status': 'cancelled', 'price': 48.18}
{'user_id': '3b7d2387-9bae-4283-8a6f-3c6bcbddc2f8', 'pickup_location': 'Port Derekbury', 'destination': 'Sarahhaven', 'timestamp': '2025-03-16T15:51:11.230671', 'status': 'completed', 'price': 41.69}
{'user_id': 'd4eb647d-7fcb-46cb-a2f5-a9afe9a3803b', 'pickup_location': 'Elizabethfurt', 'destination': 'West Chloe', 'timestamp': '2025-03-16T15:51:11.230870', 'status': 'completed', 'price': 13.13}
{'user_id': 'b10d6fea-65be-4a64-96cc-d32a1ee5397e', 'pickup_location': 'New Lisa', 'destination': 'South Sheenastad', 'timestamp': '2025-03-16T15:51:11.231055', 'status': 'ongoing', 'price': 10.78}
{'user_id': '2c5e76ff-a31c-4047-9f24-8caea97b933c', 'pickup_location': 'Catherinehaven', 'destination': 'West Dianeberg', 'timestamp': '2025-03-16T15:51:11.231235', 'status': 'ongoing', 'price': 23.4

In [None]:
import fastavro

file_path = "driver_availability.avro"

records = []
with open(file_path, "rb") as avro_file:
    reader = fastavro.reader(avro_file)
    for record in reader:
        records.append(record)

for record in records[:10]:
    print(record)

{'driver_id': '3b35012e-b414-41a5-af2f-3824136f4bd0', 'location': 'New Emily', 'status': 'busy', 'timestamp': '2025-03-16T15:51:11.264718'}
{'driver_id': 'd78606b5-b50f-47ce-9067-d1478ff751e8', 'location': 'Christophermouth', 'status': 'available', 'timestamp': '2025-03-16T15:51:11.264798'}
{'driver_id': 'f2fe4925-f84d-459d-ac0f-b0bad24e415d', 'location': 'Melendezside', 'status': 'busy', 'timestamp': '2025-03-16T15:51:11.264902'}
{'driver_id': 'bbc8fd47-5ca0-42fd-9d7b-23d931fb57bf', 'location': 'North Brandon', 'status': 'busy', 'timestamp': '2025-03-16T15:51:11.264975'}
{'driver_id': '9cb987c2-da7b-4812-962d-18090cace863', 'location': 'Brandyport', 'status': 'available', 'timestamp': '2025-03-16T15:51:11.265065'}
{'driver_id': 'af213e2a-c823-49b5-9480-31dcacecbf5b', 'location': 'Meyerhaven', 'status': 'busy', 'timestamp': '2025-03-16T15:51:11.265171'}
{'driver_id': 'fb97940c-8f4a-4c9b-8b25-b44a1965f0ac', 'location': 'South David', 'status': 'available', 'timestamp': '2025-03-16T15:51