In [1]:
from datetime import datetime, date
import meteostat as ms
import pandas as pd
import numpy as np

NFL teams have official and commonly used 3-letter abbreviation codes that we will use to associate team with weather

In [2]:
# Gather home stadium details - this is a lot of point and click on google maps
# This dictionary includes:
# the official 3-letter code
# the latitude and longitude of the stadium
# Whether the stadium is closed or not. Closed accounts for stadiums that are retractable
home_stadium_details = {
    "ARI": (33.53, -112.26, True),
    "ATL": (33.75, -84.40, True),
    "BAL": (39.28, -76.62, False),
    "BUF": (42.77, -78.78, False),
    "CAR": (35.22, -80.85, False),
    "CHI": (41.86, -87.62, False),
    "CIN": (39.09, -84.51, False),
    "CLE": (41.50, -81.70, False),
    "DAL": (32.75, -97.09, True),
    "DEN": (39.74, -105.02, False),
    "DET": (42.34, -83.04, True),
    "GB": (44.50, -88.06, False),
    "HOU": (29.68, -95.41, True),
    "IND": (39.76, -86.16, True),
    "JAX": (30.32, -81.64, False),
    "KC": (39.05, -91.48, False),
    "LV": (36.09, -115.18, True),
    "LAC": (39.95, -118.34, True),
    "LAR": (39.95, -118.34, True),
    "MIA": (25.95, -80.24, False),
    "MIN": (44.97, -93.26, True),
    "NE": (42.09, -71.26, False),
    "NO": (29.95, -90.08, True),
    "NYG": (40.81, -74.07, False),
    "NYJ": (40.81, -74.07, False),
    "PHI": (39.90, -75.17, False),
    "PIT": (40.44, -80.01, False),
    "SF": (37.40, -121.97, False),
    "SEA": (47.59, -122.33, False),
    "TB": (27.98, -82.50, False),
    "TEN": (36.17, -86.77, False),
    "WAS": (38.91, -76.86, False),
}
    

NFL games are played from early September to early February, limiting the dates that we need weather data from.

In addition, some teams play in "closed" stadiums meaning that games are temperature controlled and not based on outside temperatures
For this analysis, we will be looking at temperature averages in the last 6 years, because the last NFL team stadium changes occured in 2020, with the Raiders moving to Las Vegas and the Rams and the Chargers moving to a shared stadium in Inglewood, California

The workflow for using meteostat to get information works like this

In [3]:
# Specify a Latitude / Longitude
lat, lon = 33.53, -112.26
# Define a Meteostat point
point = ms.Point(lat, lon)
# Find 3 nearest stations to provide average over area (don't want to be influenced by daily outliers)
stations = ms.stations.nearby(point, limit=3)
print(stations)

                                            name country region  latitude  \
id                                                                          
KGEU0  Glendale / Country Greens At Villa De Paz      US     AZ   33.5269   
KLUF0                  Phoenix / Glendale / Webb      US     AZ   33.5350   
KGYR0              Phoenix / Litchfield Junction      US     AZ   33.4237   

       longitude  elevation         timezone  distance  
id                                                      
KGEU0  -112.2951        326  America/Phoenix    3271.7  
KLUF0  -112.3832        331  America/Phoenix   11432.8  
KGYR0  -112.3745        296  America/Phoenix   15890.0  


This shows the individual nearby weather stations, their id, name, latitude, longitude, and distance away from lat/lon point

This distance is in meters so KGEU0 is 3,271 meters away (or ~2 miles). The furthest one away in this case is about 10 miles away.

We can then sample daily weather temperatures in between a start date and an end date, i.e which returns a dataframe of data

In [4]:
start_date = date(2024, 7, 4)
end_date = date(2024, 7, 7)
df = ms.daily(stations, start_date, end_date).fetch()
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,temp,tmin,tmax,rhum,prcp,snwd,wspd,wpgt,pres,tsun,cldc
station,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
KGEU0,2024-07-04,37.4,30.4,44.0,29,0.0,,19.2,,1008.6,,0
KGEU0,2024-07-05,38.6,31.0,47.0,22,0.0,,15.8,,1005.7,,1
KGEU0,2024-07-06,38.1,29.9,44.0,22,0.0,,15.9,,1007.0,,0
KGEU0,2024-07-07,37.9,29.9,44.0,24,0.0,,21.3,,1006.3,,0
KGYR0,2024-07-04,37.4,30.7,44.0,32,0.0,,21.5,,1009.2,,0


These are generally in metric units, so temperature is in Celsius, wind speed is in km/h, etc.

To be able to nicely work with the NFLreadr dataset, we will create some functions to apply to our dataframe

In [5]:
# There are many different ways to calculate week numbers, this method assumes Jan 1 to Jan 7 is always "Week 1"
# Other methods ensure week 1 starts with either a sunday, monday, etc. which makes comparing weeknumbers across years difficult
def convert_to_weeknum(x):
    dt = x.to_pydatetime(x)
    weeknum = ((x - datetime(x.year,1,1)).days // 7) + 1
    return weeknum

This will allow us to compare the game-day temperature to the average weekly temperature that the team experiences.

We figured that temperature can vary wildly over a month in certain places (like Michigan) so wanted to have some more fine-tuned data

In [6]:
# Convert to imperial units for easy comparison to nfl dataset
def convert_to_f(x):
    f = (x * (9/5)) + 32
    return f

Now we want to develop the data for average weekly temperature for each team, looking over the past few years.

Wrapping all that up, this looks for the temperatures around each stadium for the dates between Sept 1 and Feb 10 (NFL Season)

In [7]:
years = [2020, 2021, 2022, 2023, 2024, 2025]
start = (9, 1)
end = (2, 10)

weekly_weather_details = {}
for home_stadium, details in home_stadium_details.items():
    weekly_weather_details.setdefault(home_stadium, {})

    lat, lon, closed_stadium = details
    # Make MeteoStat point based on desired lat/long
    point = ms.Point(lat, lon)
    # Find 3 nearest stations to provide average over area (don't want to be influenced by outliers)
    stations = ms.stations.nearby(point, limit=3)
    week_details = {}
    for i in range(len(years[:-1])):
        # This is the September of the year
        start_date = date(years[i], start[0], start[1])
        # Corresponding February of the year after
        end_date = date(years[i+1], end[0], end[1])
        df = ms.daily(stations, start_date, end_date).fetch()
        df = df.reset_index()
        df["week_num"] = df["time"].apply(convert_to_weeknum)
        df["temp_f"] = df["temp"].apply(convert_to_f)
        # For closed stadiums, we assume the temperature is 70 degrees
        if closed_stadium:
            df["temp_f"] = 70
        for weeknum, weekdf in df.groupby("week_num"):
            week_details.setdefault(weeknum, [])
            weekdf_notna = weekdf[weekdf["temp_f"].notna()]
            non_na_temps = weekdf_notna["temp_f"]
            temps = non_na_temps.tolist()
            week_details[weeknum].extend(temps)
            
    for weeknum, temps in week_details.items():
        weekly_weather_details[home_stadium][weeknum] = float(np.sum(temps) / len(temps))

With this data, we can now form a dataframe for easy use in joining the nflreadr data

In [8]:
stadiums = []
week_nums = []
temps = []
for home_stadium, weather_details in weekly_weather_details.items():
    for week_num, temp in weather_details.items():
        stadiums.append(home_stadium)
        week_nums.append(week_num)
        temps.append(temp)

df_data = {"team": stadiums, "week_num": week_nums, "avg_temp": temps}
df = pd.DataFrame(data=df_data)
df.head()

Unnamed: 0,team,week_num,avg_temp
0,ARI,1,70.0
1,ARI,2,70.0
2,ARI,3,70.0
3,ARI,4,70.0
4,ARI,5,70.0


Final step is converting to csv for use in joining the data sets

In [9]:
df.to_csv("data/team_avg_temp.csv", index=False)