In [None]:
from kafka import KafkaConsumer
import json
import matplotlib.pyplot as plt
from collections import defaultdict, Counter
from IPython.display import clear_output, display
import time

# --------------------------------------------------
# Configuración del consumer de Kafka
# --------------------------------------------------
consumer = KafkaConsumer(
    'uber_trips',
    bootstrap_servers='100.68.89.127":9092',
    auto_offset_reset='latest',
    group_id='grupo_visualizacion',
    value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)

# --------------------------------------------------
# Estructuras para métricas en tiempo real
# --------------------------------------------------
sales_by_hour = defaultdict(float)
trips_by_hour = defaultdict(int)
pickup_zones = Counter()
dropoff_zones = Counter()

# --------------------------------------------------
# Configuración de Matplotlib
# --------------------------------------------------
plt.ion()
fig, axes = plt.subplots(2, 2, figsize=(14, 8))
fig.suptitle("Visualización en tiempo real - Uber NYC")

batch = []
batch_size = 10
refresh_time = 5  # segundos

# --------------------------------------------------
# Loop principal de consumo y visualización
# --------------------------------------------------
while True:
    try:
        for msg in consumer:
            trip = msg.value
            batch.append(trip)

            if len(batch) >= batch_size:
                for trip in batch:
                    hour = int(trip.get("hour", 0))
                    pickup = str(trip.get("PULocationID", "NA"))
                    dropoff = str(trip.get("DOLocationID", "NA"))

                    fare = trip.get("base_passenger_fare", 0.0)
                    tips = trip.get("tips", 0.0)
                    pay = trip.get("driver_pay", 0.0)

                    uber_sales = fare + tips - pay

                    sales_by_hour[hour] += uber_sales
                    trips_by_hour[hour] += 1
                    pickup_zones[pickup] += 1
                    dropoff_zones[dropoff] += 1

                clear_output(wait=True)

                # Gráfico 1: Ganancias acumuladas por hora
                ax = axes[0, 0]
                ax.clear()
                hours = sorted(sales_by_hour.keys())
                ax.bar(hours, [sales_by_hour[h] for h in hours])
                ax.set_title("Ganancias acumuladas por hora")
                ax.set_xlabel("Hora")
                ax.set_ylabel("Ganancia (USD)")
                ax.grid(True)

                # Gráfico 2: Cantidad de viajes por hora
                ax = axes[0, 1]
                ax.clear()
                ax.plot(hours, [trips_by_hour[h] for h in hours], marker='o')
                ax.set_title("Cantidad de viajes por hora")
                ax.set_xlabel("Hora")
                ax.set_ylabel("Viajes")
                ax.grid(True)

                # Gráfico 3: Top zonas de origen
                ax = axes[1, 0]
                ax.clear()
                top_pickup = pickup_zones.most_common(5)
                if top_pickup:
                    zones, counts = zip(*top_pickup)
                    ax.barh(zones, counts)
                ax.set_title("Top 5 zonas de origen")
                ax.set_xlabel("Cantidad de viajes")

                # Gráfico 4: Top zonas de destino
                ax = axes[1, 1]
                ax.clear()
                top_dropoff = dropoff_zones.most_common(5)
                if top_dropoff:
                    zones, counts = zip(*top_dropoff)
                    ax.barh(zones, counts)
                ax.set_title("Top 5 zonas de destino")
                ax.set_xlabel("Cantidad de viajes")

                plt.tight_layout(rect=[0, 0.03, 1, 0.95])
                display(fig)
                plt.pause(refresh_time)

                batch.clear()

    except KeyboardInterrupt:
        print("Proceso detenido por el usuario.")
        break

    except Exception as e:
        print("Error:", e)
        time.sleep(5)
