In [2]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
from kafka import KafkaProducer
import json

# Function to generate sample mobility footfall data in Bangalore
def generate_bangalore_sample_data(num_records=1000):
    # Fixed location for Bangalore
    bangalore_lat = 12.9716
    bangalore_lon = 77.5946

    # Sample days of the week
    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    
    # Generate sample data
    data = {
        'hashed_device_id': [f'device_{i}' for i in range(num_records)],
        'timezone_visit': ['UTC+5:30'] * num_records,  # Bangalore time zone
        'day_of_week_visit': [random.choice(days_of_week) for _ in range(num_records)],
        'time_stamp': [(datetime.now() - timedelta(days=random.randint(0, 30))).timestamp() for _ in range(num_records)],
        'lat_visit': [round(bangalore_lat + random.uniform(-0.01, 0.01), 6) for _ in range(num_records)],  # Slight variations
        'data_visit': [(datetime.now() - timedelta(days=random.randint(0, 30))).date().isoformat() for _ in range(num_records)],  # Convert to string
        'time_visit': [datetime.now().time().replace(hour=random.randint(0, 23), minute=random.randint(0, 59), second=random.randint(0, 59)).strftime('%H:%M:%S') for _ in range(num_records)],
        'lon_visit': [round(bangalore_lon + random.uniform(-0.01, 0.01), 6) for _ in range(num_records)]  # Slight variations
    }

    # Create DataFrame
    df = pd.DataFrame(data)
    
    return df

# Kafka producer configuration
def create_kafka_producer():
    return KafkaProducer(
        bootstrap_servers=['localhost:9092'],  # Replace with your GCP VM's IP address
        value_serializer=lambda v: json.dumps(v).encode('utf-8')  # Serialize message to JSON format
    )

# Function to push DataFrame rows to Kafka
def push_data_to_kafka(df, topic):
    producer = create_kafka_producer()
    
    for index, row in df.iterrows():
        producer.send(topic, value=row.to_dict())  # Send each row as a message
        producer.flush()  # Ensure the message is sent

    producer.close()  # Close the producer connection

# Main code to generate data and push to Kafka
if __name__ == "__main__":
    # Generate sample data for Bangalore
    bangalore_sample_data = generate_bangalore_sample_data(num_records=10)
    
    kafka_topic = 'topic'  # Replace with your topic name
    push_data_to_kafka(bangalore_sample_data, kafka_topic)
