In [1]:
## TODO: REMINDER TO CHANGE THE INDEX OF THE ROWS YOU WANT TO PULL WEATHER DATA FOR!!!!!

In [2]:
import pandas as pd
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import re

from datetime import datetime
import timezonefinder, pytz

import yaml

from tqdm import tqdm
tqdm.pandas()

In [3]:
def _convert(tude):
    '''
    
    '''
    
    ## If tude is a str, convert the string to decimal degrees
    if isinstance(tude, str):
        ## If the latiTUDE or longiTUDE are in the form [Degrees°Minutes′Secounds″Direction]
        if '″' in tude:

            ## Multiplier is 1 for North/East & -1 for South/West
            multiplier = 1 if tude[-1] in ['N', 'E'] else -1

            ## Split data with Degree/Minute/Seconds symbols
            deg, minutes, seconds, direction = re.split('[°\′″]', tude)

            ## convert above split data into a +/- decimal representation of latiTUDE or longiTUDE
            decimal_degrees = round(multiplier * (float(deg) + float(minutes)/60 + float(seconds)/(60*60)),4)

            #print(f"{tude} converted to {decimal_degrees}")
            return decimal_degrees
        
        ## If the latiTUDE or longiTUDE are in the form [Degrees°]
        else:
            
            ## Split data with Degree symbols
            decimal_degrees, direction = re.split('[°]', tude)

            ## Multiplier is 1 for North/East & -1 for South/West
            multiplier = 1 if direction in ['N', 'E'] else -1

            ## convert above split data into a +/- decimal representation of latiTUDE or longiTUDE
            decimal_degrees = round(multiplier * float(decimal_degrees),4)

            #print(f"{tude} converted to {decimal_degrees}")
            return decimal_degrees
        
    ## If tude is already a float, just return that value rounded to 4 decimals
    elif isinstance(tude, float):
        return round(tude, 4)
    
def _convert_12hour_to_24hour( time_12hour):
    in_time  = datetime.strptime(time_12hour, "%I:%M%p" )
    out_time = datetime.strftime(in_time,     "%H:%M:%S")

    return out_time

def isTimeFormat(input, format):
    try:
        datetime.strptime(input, format)
        return True
    except ValueError:
        return False 

def pull_openWeatherMap_data(row, tf, session, OWM_API_KEY):

    lat      = _convert(row.LATITUDE)
    lon      = _convert(row.LONGITUDE)
    local    = pytz.timezone(tf.certain_timezone_at(lat=lat, lng=lon))
    date = f'{row.GAME_DATE.year}-{row.GAME_DATE.month}-{row.GAME_DATE.day}'
    if isTimeFormat(f"{date} {_convert_12hour_to_24hour(row.GAMETIME)}", "%Y-%m-%d %H:%M:%S"):
        naive    = datetime.strptime(f"{date} {_convert_12hour_to_24hour(row.GAMETIME)}", "%Y-%m-%d %H:%M:%S")
    elif isTimeFormat(f"{date} {_convert_12hour_to_24hour(row.GAMETIME)}", "%m-%d-%Y %H:%M:%S"):
        naive    = datetime.strptime(f"{date} {_convert_12hour_to_24hour(row.GAMETIME)}", "%m-%d-%Y %H:%M:%S")
    else:
        naive    = datetime.strptime(f"{date} {_convert_12hour_to_24hour(row.GAMETIME)}", "%m/%d/%Y %H:%M:%S")
    local_dt = local.localize(naive, is_dst=None)
    utc_dt   = local_dt.astimezone(pytz.utc)
    time     = int(utc_dt.timestamp())
    API_key  = OWM_API_KEY 

    url = f"https://api.openweathermap.org/data/3.0/onecall/timemachine?lat={lat}&lon={lon}&dt={time}&appid={API_key}"
    
    owm_response = session.get(url).json()

    row['TEMP']         = owm_response['data'][0].get('temp',       'NULL_VALUE')
    row['FEELS_LIKE']   = owm_response['data'][0].get('feels_like', 'NULL_VALUE')
    row['PRESSURE']     = owm_response['data'][0].get('pressure',   'NULL_VALUE')
    row['HUMIDITY']     = owm_response['data'][0].get('humidity',   'NULL_VALUE')
    row['DEW_POINT']    = owm_response['data'][0].get('dew_point',  'NULL_VALUE')
    row['CLOUDS']       = owm_response['data'][0].get('clouds',     'NULL_VALUE')
    row['VISIBILITY']   = owm_response['data'][0].get('visibility', 'NULL_VALUE')
    row['WIND_SPEED']   = owm_response['data'][0].get('wind_speed', 'NULL_VALUE')
    row['WIND_DEG']     = owm_response['data'][0].get('wind_deg',   'NULL_VALUE')
    row['WEATHER_MAIN'] = owm_response['data'][0]['weather'][0].get('main',        'NULL_VALUE')
    row['WEATHER_DESC'] = owm_response['data'][0]['weather'][0].get('description', 'NULL_VALUE')

# #   print(temp, feels_like, pressure, humidity, dew_point, uvi, clouds, visibility, wind_speed, wind_deg, weather_main, weather_desc)

    return row

In [4]:
with open('../../configs/config.yaml') as file:
    config_dict = yaml.load(file, Loader=yaml.FullLoader)


df = pd.read_csv('../../data/cleaned/NFL_wk_by_wk_cleaned.csv')
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df.sort_values(['GAME_DATE','GAMETIME', 'GAME_ID'], ascending=True, inplace=True)

df_stadium = pd.read_csv('../../data/cleaned/NFL_stadium_reference.csv')

temp_df = df.loc[:,['GAME_ID','GAME_DATE','GAMETIME', 'YEAR','STADIUM']].copy()
temp_df = temp_df.merge(df_stadium.loc[:,['YEAR','STADIUM','LATITUDE', 'LONGITUDE']],how='left', on=['YEAR','STADIUM']).drop_duplicates()
temp_df.sort_values(['GAME_DATE','GAMETIME', 'GAME_ID'], ascending=True, inplace=True, ignore_index=True)

temp_df = temp_df.iloc[df.loc[(df['YEAR']=='2023-2024')&(df['WEEK_NUM']==df.iloc[-1,:]['WEEK_NUM']),:].index,:]

tzf = timezonefinder.TimezoneFinder()

sess = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
sess.mount('http://', adapter)
sess.mount('https://', adapter)

In [5]:
temp_df

Unnamed: 0,GAME_ID,GAME_DATE,GAMETIME,YEAR,STADIUM,LATITUDE,LONGITUDE
8412,2023_04_LAC_MIN,2023-09-24,1:00PM,2023-2024,U.S. Bank Stadium,44.974,-93.258
8413,2023_04_NE_NYJ,2023-09-24,1:00PM,2023-2024,MetLife Stadium,40.8135,-74.0744
8414,2023_04_NO_GB,2023-09-24,1:00PM,2023-2024,Lambeau Field,44.5014,-88.0622
8415,2023_04_TEN_CLE,2023-09-24,1:00PM,2023-2024,Cleveland Browns Stadium,41.5061,-81.6994
8416,2023_03_CAR_SEA,2023-09-24,4:05PM,2023-2024,Lumen Field,47.5952,-122.3316
8417,2023_04_CAR_SEA,2023-09-24,4:05PM,2023-2024,Lumen Field,47.5952,-122.3316
8418,2023_03_CHI_KAN,2023-09-24,4:25PM,2023-2024,GEHA Field at Arrowhead Stadium,39.0489,-94.4839
8419,2023_03_DAL_ARI,2023-09-24,4:25PM,2023-2024,State Farm Stadium,33.528,-112.263
8420,2023_04_CHI_KAN,2023-09-24,4:25PM,2023-2024,GEHA Field at Arrowhead Stadium,39.0489,-94.4839
8421,2023_04_DAL_ARI,2023-09-24,4:25PM,2023-2024,State Farm Stadium,33.528,-112.263


In [6]:
temp_df = temp_df.progress_apply(lambda x: pull_openWeatherMap_data(row = x, tf=tzf, session=sess, OWM_API_KEY=config_dict['OWM_API_KEY']),axis=1)

100%|██████████| 16/16 [00:12<00:00,  1.28it/s]


In [7]:
print(temp_df.shape)
display(temp_df.head(16))
# display(temp_df.tail())

(16, 18)


Unnamed: 0,GAME_ID,GAME_DATE,GAMETIME,YEAR,STADIUM,LATITUDE,LONGITUDE,TEMP,FEELS_LIKE,PRESSURE,HUMIDITY,DEW_POINT,CLOUDS,VISIBILITY,WIND_SPEED,WIND_DEG,WEATHER_MAIN,WEATHER_DESC
8412,2023_04_LAC_MIN,2023-09-24,1:00PM,2023-2024,U.S. Bank Stadium,44.974,-93.258,293.96,294.08,1012,76,289.58,75,10000,4.63,110,Rain,light rain
8413,2023_04_NE_NYJ,2023-09-24,1:00PM,2023-2024,MetLife Stadium,40.8135,-74.0744,290.87,290.89,1016,84,288.13,100,4828,7.72,50,Mist,mist
8414,2023_04_NO_GB,2023-09-24,1:00PM,2023-2024,Lambeau Field,44.5014,-88.0622,293.41,293.29,1018,69,287.54,75,10000,6.17,150,Clouds,broken clouds
8415,2023_04_TEN_CLE,2023-09-24,1:00PM,2023-2024,Cleveland Browns Stadium,41.5061,-81.6994,293.19,293.21,1015,75,288.63,100,10000,5.14,30,Clouds,overcast clouds
8416,2023_03_CAR_SEA,2023-09-24,4:05PM,2023-2024,Lumen Field,47.5952,-122.3316,289.58,289.5,1011,85,287.05,100,10000,2.57,140,Rain,light rain
8417,2023_04_CAR_SEA,2023-09-24,4:05PM,2023-2024,Lumen Field,47.5952,-122.3316,289.58,289.5,1011,85,287.05,100,10000,2.57,140,Rain,light rain
8418,2023_03_CHI_KAN,2023-09-24,4:25PM,2023-2024,GEHA Field at Arrowhead Stadium,39.0489,-94.4839,301.25,300.77,1013,38,285.61,0,10000,2.57,330,Clear,clear sky
8419,2023_03_DAL_ARI,2023-09-24,4:25PM,2023-2024,State Farm Stadium,33.528,-112.263,309.59,307.43,1010,17,280.63,0,10000,1.34,312,Clear,clear sky
8420,2023_04_CHI_KAN,2023-09-24,4:25PM,2023-2024,GEHA Field at Arrowhead Stadium,39.0489,-94.4839,301.25,300.77,1013,38,285.61,0,10000,2.57,330,Clear,clear sky
8421,2023_04_DAL_ARI,2023-09-24,4:25PM,2023-2024,State Farm Stadium,33.528,-112.263,309.59,307.43,1010,17,280.63,0,10000,1.34,312,Clear,clear sky


In [6]:
temp_df.drop(['GAME_DATE','GAMETIME','YEAR','STADIUM','LATITUDE', 'LONGITUDE'],axis=1).to_csv('../../data/cleaned/NFL_wk_by_wk_w_weather.csv', mode='a',index=False,header=False)