In [84]:
import os
import requests
from datetime import datetime, timedelta
import pandas as pd
import json
import numpy as np
from dotenv import load_dotenv
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [85]:
urlbase = "https://data.cityofnewyork.us/resource/"

In [86]:
def desde_fecha(fecha_str):
       return f'{fecha_str}T00:00:00.000'

In [87]:
def hasta_fecha(fecha_str):
   return f'{fecha_str}T23:59:59.000'

In [88]:
def extraccion_actual(ini, fin, token):
    url_eventos = f"{urlbase}tvpp-9vvx.json"
    
    param = {
        "$where": f"start_date_time >= '{ini}' AND start_date_time <= '{fin}'",    
    }

    header = {"X-App-Token": token}
    eventos = requests.get(url=url_eventos, params=param, headers=header)
    

    if eventos.status_code != 200:
        print(f"Error Parques: {eventos.text}")
 
    assert eventos.status_code == 200, "Error en la extracción de eventos"
    return eventos.json()




In [89]:
load_dotenv()
token = os.getenv('NYC_OPEN_DATA_TOKEN')
assert token is not None, "Falta la variable de entorno NYC_OPEN_DATA_TOKEN"


fecha_hoy_str = datetime.now().strftime('%Y-%m-%d')
fecha_actual = desde_fecha(fecha_hoy_str)
fecha_fin = hasta_fecha(fecha_hoy_str)


json_eventos = extraccion_actual(fecha_actual, fecha_fin, token)

In [90]:
df = pd.DataFrame(json_eventos)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   event_id             175 non-null    object
 1   event_name           174 non-null    object
 2   start_date_time      175 non-null    object
 3   end_date_time        175 non-null    object
 4   event_agency         175 non-null    object
 5   event_type           175 non-null    object
 6   event_borough        175 non-null    object
 7   event_location       175 non-null    object
 8   street_closure_type  175 non-null    object
 9   community_board      175 non-null    object
 10  police_precinct      175 non-null    object
 11  cemsid               169 non-null    object
 12  event_street_side    9 non-null      object
dtypes: object(13)
memory usage: 17.9+ KB


In [91]:
df

Unnamed: 0,event_id,event_name,start_date_time,end_date_time,event_agency,event_type,event_borough,event_location,street_closure_type,community_board,police_precinct,cemsid,event_street_side
0,889263,Dana Discovery Center Lawn,2026-02-18T00:00:00.000,2026-02-18T23:59:00.000,Parks Department,Special Event,Manhattan,Central Park: Dana Discovery Center Lawn,,64,22,2734,
1,871499,Veterans Lawn Closure,2026-02-18T00:00:00.000,2026-02-18T23:59:00.000,Parks Department,Special Event,Manhattan,Madison Square Park: Veterans Lawn,,5,13,11918,
2,906226,Cherry Lawn Closure,2026-02-18T00:00:00.000,2026-02-18T14:00:00.000,Parks Department,Special Event,Manhattan,Madison Square Park: Cherry Lawn,,5,13,11920,
3,895641,construction,2026-02-18T00:00:00.000,2026-02-18T12:59:00.000,Parks Department,Special Event,Manhattan,Fort Tryon Park: Billings Lawn,,12,34,3994,
4,889688,Lawn Closure - Bowling Green Lawn,2026-02-18T00:00:00.000,2026-02-18T23:59:00.000,Parks Department,Special Event,Manhattan,Central Park: Bowling Green Lawn,,64,22,2756,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,901428,Soccer - Non Regulation,2026-02-18T20:00:00.000,2026-02-18T22:00:00.000,Parks Department,Sport - Youth,Staten Island,Fairview Park: Multi-Purpose Field-01,,3,123,12181,
171,892041,Football - Adults,2026-02-18T20:00:00.000,2026-02-18T18:30:00.000,Parks Department,Sport - Adult,Staten Island,Gen. Douglas MacArthur Park: Multi-Use Field-01,,2,122,10697,
172,890709,Soccer - Non Regulation,2026-02-18T20:00:00.000,2026-02-18T22:00:00.000,Parks Department,Sport - Adult,Brooklyn,McCarren Park: Soccer-01,,01,94,5122,
173,894723,Football - Adults,2026-02-18T20:00:00.000,2026-02-18T23:00:00.000,Parks Department,Sport - Adult,Brooklyn,Sternberg Park (Lindsey Park): Soccer/Football-01,,01,90,9933,


In [92]:

df['start_date_time'] = pd.to_datetime(df['start_date_time'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
df["start_date_time"] = df["start_date_time"].dt.strftime('%H:%M:%S')
df['end_date_time'] = pd.to_datetime(df['end_date_time'], errors='coerce',  format='%Y-%m-%dT%H:%M:%S.%f')
df["end_date_time"] = df["end_date_time"].dt.strftime('%H:%M:%S')



In [93]:
df.columns
df = df.drop(["event_id", "event_agency", "street_closure_type", 'community_board','police_precinct', 'cemsid',
       'event_street_side' ], axis = 1)


In [94]:

riesgo_map = {
    'Parade': 10,
    'Athletic Race / Tour': 10,
    'Street Event': 8,
    'Special Event': 7,
    'Plaza Event': 6,
    'Plaza Partner Event': 6,
    'Theater Load in and Load Outs': 5,
    'Religious Event': 3,
    'Farmers Market': 2,
    'Sidewalk Sale': 2,
    'Production Event': 1,
    'Sport - Adult': 1,
    'Sport - Youth': 1,
    'Miscellaneous': 1,
    'Open Street Partner Event': 2
}


df['nivel_riesgo_tipo'] = df['event_type'].map(riesgo_map)

In [95]:
df = df.sort_values(by = "nivel_riesgo_tipo", ascending= False)
df = df[df.nivel_riesgo_tipo > 6]

In [96]:
df

Unnamed: 0,event_name,start_date_time,end_date_time,event_type,event_borough,event_location,nivel_riesgo_tipo
10,58 BOWERY EVENT,00:00:00,23:30:00,Street Event,Manhattan,"BOWERY between CANAL STREET and BAYARD STREET,...",8
37,Street Health Outreach Wellness Program,07:30:00,17:00:00,Street Event,Manhattan,WEST 39 STREET between 10 AVENUE and 11 AVENUE,8
61,BIO BUS,10:00:00,15:30:00,Street Event,Manhattan,WEST 176 STREET between AUDUBON AVENUE and SA...,8
3,construction,00:00:00,12:59:00,Special Event,Manhattan,Fort Tryon Park: Billings Lawn,7
4,Lawn Closure - Bowling Green Lawn,00:00:00,23:59:00,Special Event,Manhattan,Central Park: Bowling Green Lawn,7
5,Lawn Closure - Pilgrim Hill,00:00:00,23:59:00,Special Event,Manhattan,Central Park: Pilgrim Hill,7
1,Veterans Lawn Closure,00:00:00,23:59:00,Special Event,Manhattan,Madison Square Park: Veterans Lawn,7
6,East Green -East 72nd Playground Lawn,00:00:00,23:59:00,Special Event,Manhattan,Central Park: East 72nd Street Playground Lawns,7
7,Redbud Lawn Closure,00:00:00,13:00:00,Special Event,Manhattan,Madison Square Park: Redbud Lawn,7
8,Redbud Lawn Closure,00:00:00,03:00:00,Special Event,Manhattan,Madison Square Park: Redbud Lawn,7


In [97]:
geolocator = Nominatim(user_agent="nyc_events_geocoder")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1, max_retries=2)

In [98]:
def extraer_intersecciones(localizacion, barrio):
    """
    Extrae las intersecciones de las calles del evento
    """
    intersecciones = []
    
    segmentos = localizacion.split(",")
    
    for segmento in segmentos:
        segmento = segmento.strip()
        if " between " in segmento:
            partes = segmento.split(" between ")
            calle_principal = partes[0].strip()

            cruces = partes[1].split(" and ")
            for cruce in cruces:
                cruce = cruce.strip()
                if cruce:
                    intersecciones.append(f"{calle_principal} & {cruce}, {barrio}, New York")
    
    print(intersecciones)
    
    return intersecciones if intersecciones else [localizacion + f", {barrio}, New York"]

In [99]:
def extraer_coord(localizacion, barrio):
    """
    Devuelve las coordenadas del centro de las ubicaciones (calles que cruzan), o la coordenada del parque
    Devuelve longitud-latitud
    """
    if pd.isna(localizacion):
        return 0, 0
    
    if ":" in localizacion:
        resultado = geocode(localizacion.split(":")[0].strip() + f", {barrio}, New York")
        if resultado:
            return resultado.longitude, resultado.latitude
        return 0, 0

    intersections = extraer_intersecciones(localizacion, barrio)
    
    coords = []
    for intersection in intersections:
        try:
            resultado = geocode(intersection)
            if resultado:
                coords.append((resultado.latitude, resultado.longitude))
        except:
            continue
    
    if coords:
        lat = np.mean([c[0] for c in coords])
        lon = np.mean([c[1] for c in coords])
        return lon, lat
    
    return 0, 0

In [100]:
df["coordenadas"] = df.apply(
    lambda row: list(extraer_coord(row["event_location"], row["event_borough"])), axis=1
)

['BOWERY & CANAL STREET, Manhattan, New York', 'BOWERY & BAYARD STREET, Manhattan, New York', 'BOWERY & CANAL STREET, Manhattan, New York', 'BOWERY & BAYARD STREET, Manhattan, New York', 'CANAL STREET & BOWERY, Manhattan, New York', 'CANAL STREET & ELIZABETH STREET, Manhattan, New York']
['WEST   39 STREET & 10 AVENUE, Manhattan, New York', 'WEST   39 STREET & 11 AVENUE, Manhattan, New York']
['WEST  176 STREET & AUDUBON AVENUE, Manhattan, New York', 'WEST  176 STREET & SAINT NICHOLAS AVENUE, Manhattan, New York']


In [101]:
df

Unnamed: 0,event_name,start_date_time,end_date_time,event_type,event_borough,event_location,nivel_riesgo_tipo,coordenadas
10,58 BOWERY EVENT,00:00:00,23:30:00,Street Event,Manhattan,"BOWERY between CANAL STREET and BAYARD STREET,...",8,"[-73.9964032, 40.7159532]"
37,Street Health Outreach Wellness Program,07:30:00,17:00:00,Street Event,Manhattan,WEST 39 STREET between 10 AVENUE and 11 AVENUE,8,"[0, 0]"
61,BIO BUS,10:00:00,15:30:00,Street Event,Manhattan,WEST 176 STREET between AUDUBON AVENUE and SA...,8,"[0, 0]"
3,construction,00:00:00,12:59:00,Special Event,Manhattan,Fort Tryon Park: Billings Lawn,7,"[0, 0]"
4,Lawn Closure - Bowling Green Lawn,00:00:00,23:59:00,Special Event,Manhattan,Central Park: Bowling Green Lawn,7,"[-73.9323872, 40.8618103]"
5,Lawn Closure - Pilgrim Hill,00:00:00,23:59:00,Special Event,Manhattan,Central Park: Pilgrim Hill,7,"[-73.9653627, 40.7827725]"
1,Veterans Lawn Closure,00:00:00,23:59:00,Special Event,Manhattan,Madison Square Park: Veterans Lawn,7,"[-73.9653627, 40.7827725]"
6,East Green -East 72nd Playground Lawn,00:00:00,23:59:00,Special Event,Manhattan,Central Park: East 72nd Street Playground Lawns,7,"[-73.9879655, 40.742203]"
7,Redbud Lawn Closure,00:00:00,13:00:00,Special Event,Manhattan,Madison Square Park: Redbud Lawn,7,"[-73.9653627, 40.7827725]"
8,Redbud Lawn Closure,00:00:00,03:00:00,Special Event,Manhattan,Madison Square Park: Redbud Lawn,7,"[-73.9879655, 40.742203]"


In [102]:
from pymongo import MongoClient

url_servidor = 'mongodb://127.0.0.1:27017/'


client = MongoClient(url_servidor)

# código para ver si se ha conectado bien
try:
    s = client.server_info() # si hay error tendremos una excepción
    print("Conectado a MongoDB, versión",s["version"])
    db = client["PD1"]
except:
    print ("Error de conexión ¿está arrancado el servidor?")

Conectado a MongoDB, versión 8.2.3


In [None]:
def cursor_paradas_afectedas(coordinates): #coordinates de esta forma [longitud, latitud]
    cursor = db.subway.find(
   {
     "ubicacion":
       { "$near" :
          {
            "$geometry": { "type": "Point",  "coordinates": coordinates },
            "$maxDistance": 500
          }
       }
   }
)
    return cursor

In [104]:
def extraccion_paradas(cursor):
    afectadas = []
    for doc in cursor:
        afectadas.append((doc["nombre"], doc["lineas"]))

    return afectadas


In [105]:
df["paradas_afectadas"] = df["coordenadas"].apply(lambda cor: extraccion_paradas(cursor_paradas_afectedas(cor)))

In [106]:
from collections import defaultdict


def fusionar_lista_estaciones(lista_tuplas):
    '''fusiona lineas con el mismo nombre'''
    if not isinstance(lista_tuplas, list):
        return lista_tuplas
        
    estaciones_fusionadas = defaultdict(set)
    
    
    for nombre, lineas in lista_tuplas:
        estaciones_fusionadas[nombre].update(lineas.split())
        
    
    resultado = []
    for nombre, lineas_set in estaciones_fusionadas.items():
        lineas_ordenadas = " ".join(sorted(lineas_set))
        resultado.append((nombre, lineas_ordenadas))
        
    return resultado

In [107]:
df['paradas_afectadas'] = df['paradas_afectadas'].apply(fusionar_lista_estaciones)

In [108]:
df = df.drop(columns=["coordenadas",], axis = 1)

In [109]:
df

Unnamed: 0,event_name,start_date_time,end_date_time,event_type,event_borough,event_location,nivel_riesgo_tipo,paradas_afectadas
10,58 BOWERY EVENT,00:00:00,23:30:00,Street Event,Manhattan,"BOWERY between CANAL STREET and BAYARD STREET,...",8,"[(Grand St, B D L), (Canal St, 1 6 A C E J N Q..."
37,Street Health Outreach Wellness Program,07:30:00,17:00:00,Street Event,Manhattan,WEST 39 STREET between 10 AVENUE and 11 AVENUE,8,[]
61,BIO BUS,10:00:00,15:30:00,Street Event,Manhattan,WEST 176 STREET between AUDUBON AVENUE and SA...,8,[]
3,construction,00:00:00,12:59:00,Special Event,Manhattan,Fort Tryon Park: Billings Lawn,7,[]
4,Lawn Closure - Bowling Green Lawn,00:00:00,23:59:00,Special Event,Manhattan,Central Park: Bowling Green Lawn,7,"[(190 St, A), (Dyckman St, 1 A), (191 St, 1), ..."
5,Lawn Closure - Pilgrim Hill,00:00:00,23:59:00,Special Event,Manhattan,Central Park: Pilgrim Hill,7,"[(86 St, 1 4 5 6 B C Q), (81 St-Museum of Natu..."
1,Veterans Lawn Closure,00:00:00,23:59:00,Special Event,Manhattan,Madison Square Park: Veterans Lawn,7,"[(86 St, 1 4 5 6 B C Q), (81 St-Museum of Natu..."
6,East Green -East 72nd Playground Lawn,00:00:00,23:59:00,Special Event,Manhattan,Central Park: East 72nd Street Playground Lawns,7,"[(23 St, 1 C E F M R W), (23 St-Baruch College..."
7,Redbud Lawn Closure,00:00:00,13:00:00,Special Event,Manhattan,Madison Square Park: Redbud Lawn,7,"[(86 St, 1 4 5 6 B C Q), (81 St-Museum of Natu..."
8,Redbud Lawn Closure,00:00:00,03:00:00,Special Event,Manhattan,Madison Square Park: Redbud Lawn,7,"[(23 St, 1 C E F M R W), (23 St-Baruch College..."
