In [6]:
from nyrr import get_runner_info, get_runner_races, get_all_runner_races
import pandas as pd
import numpy as np
import os 
from datetime import timedelta
import requests


def get_hours(timestring):
    mult = [3600, 60, 1]
    total_seconds = sum([a*b for a,b in zip(mult, map(int,timestring.split(':')))])
    total_hours = np.round(total_seconds / 3600, 0).astype(int)
    if total_hours == 0:
        return 1
    return total_hours

def read_and_prep_data(filename):
    df = pd.read_csv(filename)
    df['numHours'] = df["actualTime"].apply(get_hours)
    return df

def get_weather_data(df):
    URL_ROOT = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/'
    QUERY_TYPE = 'weatherdata/history?&aggregateHours=1'
    EXTRA_PARAMS = '&collectStationContributions=true&unitGroup=us&contentType=csv'
    URL_KEY = '&key=' + os.environ.get('VC_TOKEN')
    URL_LOC = '&location=' + 'New%20York,NY' # doesn't account for races in NJ beware!
    
    for i in df.index:
        if str(df['venue'].loc[i]) == 'nan':
            continue
        start_datetime = pd.to_datetime(df['startDateTime'].loc[i]).isoformat()
        num_hours = int(df['numHours'].loc[i])
        end_datetime =  (pd.to_datetime(start_datetime) + timedelta(hours=num_hours)).isoformat()
        DATES = f'&startDateTime={start_datetime}&endDateTime={end_datetime}'
        URL = URL_ROOT + QUERY_TYPE + DATES + EXTRA_PARAMS + URL_LOC + URL_KEY
        response = requests.get(URL)
        with open('weather_data.csv', 'a', newline='\n') as csvfile:
            csvfile.write(response.content.decode())
            csvfile.close()

In [7]:
elizabeth_races = read_and_prep_data("./data/elizabeth_races.csv")

In [8]:
elizabeth_races.head()

Unnamed: 0,index,runnerId,bib,eventCode,eventName,venue,distanceName,startDateTime,actualTime,actualPace,numHours
0,0,38295218,14687,24BHM5K,2024 Virtual NYRR Black History Month 5K,,5 kilometers,2024-02-17T12:00:00,0:24:44,07:58,1
1,1,38217731,5718,24MAN10K,2024 NYRR Manhattan 10K,"Central Park, NYC",10 kilometers,2024-02-04T08:00:00,0:48:17,07:47,1
2,2,38203270,5040,24FLHALF,2024 NYRR Fred Lebow Half Marathon,Central Park (NY23002JRG),Half-Marathon,2024-01-28T08:00:00,1:50:55,08:28,2
3,3,38080269,4478,24JK,2024 NYRR Joe Kleinerman 10K,"Central Park, NYC (NY23001JRG)",10 kilometers,2024-01-06T08:00:00,0:47:41,07:41,1
4,4,37961679,10792,24RR5K,2024 Virtual NYRR Resolution Run 5K,,5 kilometers,2024-01-01T12:00:00,0:23:41,07:38,1


In [9]:
get_weather_data(elizabeth_races)

In [10]:
jeff_data = read_and_prep_data("./data/jeff_races.csv")

In [11]:
get_weather_data(jeff_data)