In [30]:
import os
import requests
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pandas as pd
import json
import numpy as np



In [31]:
def extraccion_actual(fecha, CLIENT_ID, manana):

    url="https://api.seatgeek.com/2/events"
    params = {
    "client_id": CLIENT_ID,
    "venue.city": "New York",   
    "sort": "score.desc",        
    "per_page": 100,            
    "datetime_local.gte": fecha,
    "datetime_local.lte": manana,
    }
    
    response = requests.get(url, params=params)
    assert response.status_code == 200, "Error en la extracci√≥n de eventos"

    return response.json()

    
    

In [32]:
fecha_hoy_obj = datetime.now() 
manana_obj = fecha_hoy_obj + timedelta(days = 1)

fecha_hoy_str = fecha_hoy_obj.strftime('%Y-%m-%d')
manana_str = manana_obj.strftime('%Y-%m-%d')


API_KEY = os.getenv('CLIENT_ID_SEATGEEK')
assert API_KEY is not None, "Falta la variable de entorno CLIENT_ID_SEATGEEK"


data = extraccion_actual(fecha_hoy_str, API_KEY, manana_str)


In [33]:
data

{'events': [{'access_method': None,
   'announce_date': '2025-10-10T00:00:00',
   'announcements': {},
   'conditional': False,
   'contingent': False,
   'created_at': '2025-10-10T13:45:37',
   'date_tbd': False,
   'datetime_local': '2026-02-18T19:00:00',
   'datetime_tbd': False,
   'datetime_utc': '2026-02-19T00:00:00',
   'description': '',
   'onsale': None,
   'enddatetime_utc': '2026-02-19T01:30:00',
   'event_promotion': None,
   'game_number': 0,
   'home_game_number': 0,
   'id': 17811284,
   'integrated': None,
   'is_open': False,
   'is_visible': True,
   'is_visible_override': 'UNSET',
   'links': [{'id': '159593342',
     'url': '',
     'provider': 'stubhub',
     'primary': False,
     'display_name': '',
     'logos': {},
     'sales': [],
     'link_type': 'ID'},
    {'id': '3613739',
     'url': '',
     'provider': 'rapidseats',
     'primary': False,
     'display_name': '',
     'logos': {},
     'sales': [],
     'link_type': 'ID'},
    {'id': '00006349AD0256B0

In [34]:
eventos_limpios = []

for e in data['events']:
    info = {
        'nombre_evento': e.get('title'),
        'tipo': e.get('type'),
        'hora_inicio': e.get('datetime_local'),
        'lugar': e['venue'].get('name'),
        'direccion': e['venue'].get('address', 'Direcci√≥n no disponible'),
        'latitud': e['venue']['location'].get('lat'),
        'longitud': e['venue']['location'].get('lon'),
        'capacidad': e['venue'].get('capacity'),
        'popularidad_score': e.get('score'), 
        'venue_score': e['venue'].get('score') 
    }
    eventos_limpios.append(info)

df = pd.DataFrame(eventos_limpios)

In [35]:
df['capacidad'] = df['capacidad'].replace(0, np.nan)
#df = df.drop(["latitud", "longitud"], axis= 1)

In [36]:
df['hora_inicio'] = pd.to_datetime((df['hora_inicio']))
df['hora_inicio'] = df['hora_inicio'].dt.strftime('%H:%M')

In [37]:
tiempos_salida = {
    
    'nba': 2.5, 'nfl': 3.5, 'mlb': 3.0, 'nhl': 2.5, 'mls': 2.0, 
    'ncaa_basketball': 2.5, 'ncaa_football': 3.5, 'sports': 2.5,
    'tennis': 4.0, 'wwe': 3.0, 'boxing': 3.5, 'mma': 3.5,
    
    
    'concert': 3.0, 'music_festival': 8.0, 'classical': 2.5, 'opera': 3.0,
    
    
    'theater': 2.5, 'broadway_tickets_national': 2.5, 'comedy': 2.0, 
    'family': 2.0, 'ballet': 2.5, 'cirque_du_soleil': 2.0
}

In [38]:
def calcular_salida(fila):
    tipo_evento = fila['tipo']
    horas_duracion = tiempos_salida.get(tipo_evento, 2.5) 
    hora_inicio = pd.to_datetime(fila['hora_inicio'])
    hora_fin = hora_inicio + timedelta(hours=horas_duracion)
    
    return hora_fin.strftime('%H:%M')

In [39]:
df['hora_salida_estimada'] = df.apply(calcular_salida, axis=1)


In [40]:
df["coordinates"] = df.apply(lambda fila: [fila['longitud'], fila['latitud']], axis=1)
df = df.drop(['longitud', 'latitud', 'lugar', 'direccion', 'tipo'], axis = 1)

In [41]:
df

Unnamed: 0,nombre_evento,hora_inicio,capacidad,popularidad_score,venue_score,hora_salida_estimada,coordinates
0,The Runarounds,19:00,,0.517920,0.66,22:00,"[-73.9884, 40.7349]"
1,Hell's Kitchen - New York,14:00,,0.508596,0.65,16:30,"[-73.9875, 40.7579]"
2,Pen Pals - New York,19:00,,0.455623,0.56,21:30,"[-73.9894, 40.7353]"
3,New York City Ballet - The Sleeping Beauty,19:30,,0.472249,0.74,22:00,"[-73.9833, 40.772]"
4,The Outsiders - New York,19:00,,0.435139,0.72,21:30,"[-73.9878, 40.7586]"
...,...,...,...,...,...,...,...
67,SZN4 with Elle Baez,19:00,,0.281672,0.60,22:00,"[-73.9829, 40.738]"
68,Chess The Musical - New York,14:00,,0.257698,0.64,16:30,"[-73.9873, 40.7588]"
69,Operation Mincemeat - New York,19:30,,0.250956,0.66,22:00,"[-73.9878, 40.7589]"
70,Chess The Musical - New York,19:30,,0.245312,0.64,22:00,"[-73.9873, 40.7588]"


In [42]:
from pymongo import MongoClient

url_servidor = 'mongodb://127.0.0.1:27017/'


client = MongoClient(url_servidor)

# c√≥digo para ver si se ha conectado bien
try:
    s = client.server_info() # si hay error tendremos una excepci√≥n
    print("Conectado a MongoDB, versi√≥n",s["version"])
    db = client["PD1"]
except:
    print ("Error de conexi√≥n ¬øest√° arrancado el servidor?")

Conectado a MongoDB, versi√≥n 8.2.3


In [None]:
def cursor_paradas_afectedas(coordinates): #coordinates de esta forma [latitud, longitud]
    cursor = db.subway.find(
   {
     "ubicacion":
       { "$near" :
          {
            "$geometry": { "type": "Point",  "coordinates": coordinates },
            "$maxDistance": 500
          }
       }
   }
)
    return cursor

In [44]:
def extraccion_paradas(cursor):
    afectadas = []
    for doc in cursor:
        afectadas.append((doc["nombre"], doc["lineas"]))

    return afectadas


In [45]:
df["paradas_afectadas"] = df["coordinates"].apply(lambda cor: extraccion_paradas(cursor_paradas_afectedas(cor)))

In [46]:
from collections import defaultdict


def fusionar_lista_estaciones(lista_tuplas):
    '''fusiona lineas con el mismo nombre'''
    if not isinstance(lista_tuplas, list):
        return lista_tuplas
        
    estaciones_fusionadas = defaultdict(set)
    
    
    for nombre, lineas in lista_tuplas:
        estaciones_fusionadas[nombre].update(lineas.split())
        
    
    resultado = []
    for nombre, lineas_set in estaciones_fusionadas.items():
        lineas_ordenadas = " ".join(sorted(lineas_set))
        resultado.append((nombre, lineas_ordenadas))
        
    return resultado

In [47]:
df['paradas_afectadas'] = df['paradas_afectadas'].apply(fusionar_lista_estaciones)

In [48]:
df = df.drop(columns="coordinates", axis = 1)

In [50]:
df.head()

Unnamed: 0,nombre_evento,hora_inicio,capacidad,popularidad_score,venue_score,hora_salida_estimada,paradas_afectadas
0,The Runarounds,19:00,,0.51792,0.66,22:00,"[(14 St-Union Sq, 4 5 6 L N Q R W), (3 Av, L)]"
1,Hell's Kitchen - New York,14:00,,0.508596,0.65,16:30,"[(42 St-Port Authority Bus Terminal, A C E), (..."
2,Pen Pals - New York,19:00,,0.455623,0.56,21:30,"[(14 St-Union Sq, 4 5 6 L N Q R W), (3 Av, L)]"
3,New York City Ballet - The Sleeping Beauty,19:30,,0.472249,0.74,22:00,"[(66 St-Lincoln Center, 1), (59 St-Columbus Ci..."
4,The Outsiders - New York,19:00,,0.435139,0.72,21:30,"[(42 St-Port Authority Bus Terminal, A C E), (..."
