In [1]:
import json
from datetime import datetime
from faker import Faker
import time
import random
from kafka import KafkaProducer
import pytz

tz=pytz.timezone('Asia/Kolkata')   

bootstrap_servers="localhost:9092"
topic_name="clickstream"

producer=KafkaProducer(bootstrap_servers=bootstrap_servers, value_serializer = lambda x:json.dumps(x).encode('utf-8'))

fake=Faker()

# Sample data for events
event_types = [
    'page_view', 'button_click', 'form_submission', 'search_query', 
    'add_to_cart', 'purchase', 'video_watch', 'login', 'logout', 
    'scroll', 'rating', 'checkout_start', 'checkout_complete', 'search_filter'
]

# Sample pages and actions
pages = ['home', 'product', 'checkout', 'cart', 'login', 'search', 'profile', 'category', 'video', 'contact']
devices = ['desktop', 'mobile', 'tablet']
locations = ['New York', 'London', 'Delhi', 'Paris', 'Sydney', 'Tokyo']

def generate_event():
    event_type=random.choice(event_types)
    event = {
        'user_id':fake.uuid4(),
        'session_id':random.randint(10000,99999),
        'timestamp':datetime.now(tz=pytz.timezone('Asia/Kolkata')).isoformat(),
        'page_url':random.choice(pages),
        'event_type':event_type,
        'referrer_url':fake.url(),
        'device':random.choice(devices),
        'location':random.choice(locations),
        'user_agent':fake.user_agent(),
        'ip_address': fake.ipv4(),  
        'event_duration': random.randint(5, 300)
    }

    if event_type == 'search_query':
        event['search_query'] = fake.word()  # Random search term
    elif event_type == 'add_to_cart':
        event['product_id'] = random.randint(1000, 5000)
        event['product_name'] = fake.word().capitalize()  # Random product name
    elif event_type == 'purchase':
        event['order_id'] = fake.uuid4()
        event['total_amount'] = round(random.uniform(20.0, 500.0), 2)  # Random purchase total
    elif event_type == 'rating':
        event['product_id'] = random.randint(1000, 5000)
        event['rating'] = random.randint(1, 5)  # Random rating from 1 to 5
    elif event_type == 'checkout_start' or event_type == 'checkout_complete':
        event['cart_value'] = round(random.uniform(50.0, 1000.0), 2)
        if event_type == 'checkout_complete':
            event['payment_method'] = random.choice(['credit_card', 'paypal', 'debit_card'])
    elif event_type == 'scroll':
        event['scroll_percentage'] = random.randint(10, 100)  # Percentage of page scrolled
    elif event_type == 'login':
        event['login_method'] = random.choice(['email', 'facebook', 'google'])
    elif event_type == 'logout':
        event['logout_time'] = datetime.now().isoformat()
    elif event_type == 'search_filter':
        event['filter_applied'] = random.choice(['price', 'brand', 'rating', 'color'])
        
    return event


try:
    while True:
        event = generate_event()
        producer.send(topic_name, value=event)
        print(f"Event sent -- {event}")
        time.sleep(random.randint(1, 5))
except KeyboardInterrupt:
    print("Stopping producer...")
finally:
    producer.close()
    print("Producer closed.")
