# Fandom Radius for all NFL Teams from 2011-2014

In [1]:
import pandas as pd
from data_mountain_query.query import get_ambient_tweets
from data_mountain_query.connection import get_connection
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from datetime import timedelta
import plotly.express as px
from shapely import Point
import os
import warnings
warnings.filterwarnings("ignore")

Dictionary of teams with their name, home city, lat/lon pair, and popular anchors (#TeamName, #Mascot)

In [2]:
TEAM_CONFIG = {
    "ARI": {
        "name": "Arizona Cardinals",
        "city": "Phoenix",
        "lat": 33.4483,
        "lon": -112.0725,
        "anchors": ["#ArizonaCardinals", "#Cardinals"],
    },
    "ATL": {
        "name": "Atlanta Falcons",
        "city": "Atlanta",
        "lat": 33.7501,
        "lon": -84.3885,
        "anchors": ["#Falcons", "#AtlantaFalcons"],
    },
    "BAL": {
        "name": "Baltimore Ravens",
        "city": "Baltimore",
        "lat": 39.2904,
        "lon": -76.6104,
        "anchors": ["#Ravens", "#BaltimoreRavens"],
    },
    "BUF": {
        "name": "Buffalo Bills",
        "city": "Buffalo",
        "lat": 42.8869,
        "lon": -78.8789,
        "anchors": ["#Bills", "#BuffaloBills"],
    },
    "CAR": {
        "name": "Carolina Panthers",
        "city": "Charlotte",
        "lat": 35.2271,
        "lon": -80.8409,
        "anchors": ["#Panthers", "#CarolinaPanthers"],
    },
    "CHI": {
        "name": "Chicago Bears",
        "city": "Chicago",
        "lat": 41.8832,
        "lon": -87.6324,
        "anchors": ["#Bears", "#ChicagoBears"],
    },
    "CIN": {
        "name": "Cincinnati Bengals",
        "city": "Cincinnati",
        "lat": 39.1031,
        "lon": -84.5120,
        "anchors": ["#Bengals", "#CincinnatiBengals"],
    },
    "CLE": {
        "name": "Cleveland Browns",
        "city": "Cleveland",
        "lat": 41.4993,
        "lon": -81.6944,
        "anchors": ["#Browns", "#ClevelandBrowns"],
    },
    "DAL": {
        "name": "Dallas Cowboys",
        "city": "Dallas",
        "lat": 32.7767,
        "lon": -96.7970,
        "anchors": ["#Cowboys", "#DallasCowboys"],
    },
    "DEN": {
        "name": "Denver Broncos",
        "city": "Denver",
        "lat": 39.7392,
        "lon": -104.9903,
        "anchors": ["#Broncos", "#DenverBroncos"],
    },
    "DET": {
        "name": "Detroit Lions",
        "city": "Detroit",
        "lat": 42.3297,
        "lon": -83.0425,
        "anchors": ["#Lions", "#DetroitLions"],
    },
    "GB": {
        "name": "Green Bay Packers",
        "city": "Green Bay",
        "lat": 44.5133,
        "lon": -88.0133,
        "anchors": ["#Packers", "#GreenBayPackers"],
    },
    "HOU": {
        "name": "Houston Texans",
        "city": "Houston",
        "lat": 29.7601,
        "lon": -95.3701,
        "anchors": ["#Texans", "#HoustonTexans"],
    },
    "IND": {
        "name": "Indianapolis Colts",
        "city": "Indianapolis",
        "lat": 39.7691,
        "lon": -86.1580,
        "anchors": ["#Colts", "#IndianapolisColts"],
    },
    "JAX": {
        "name": "Jacksonville Jaguars",
        "city": "Jacksonville",
        "lat": 30.3298,
        "lon": -81.6592,
        "anchors": ["#Jaguars", "#JacksonvilleJaguars"],
    },
    "KC": {
        "name": "Kansas City Chiefs",
        "city": "Kansas City",
        "lat": 39.0997,
        "lon": -94.5786,
        "anchors": ["#Chiefs", "#KansasCityChiefs"],
    },
    "MIA": {
        "name": "Miami Dolphins",
        "city": "Miami",
        "lat": 25.7617,
        "lon": -80.1918,
        "anchors": ["#Dolphins", "#MiamiDolphins"],
    },
    "MIN": {
        "name": "Minnesota Vikings",
        "city": "Minneapolis",
        "lat": 44.9778,
        "lon": -93.2650,
        "anchors": ["#Vikings", "#MinnesotaVikings"],
    },
    "NE": {
        "name": "New England Patriots",
        "city": "Boston",
        "lat": 42.3601,
        "lon": -71.0589,
        "anchors": ["#Patriots", "#NewEnglandPatriots"],
    },
    "NO": {
        "name": "New Orleans Saints",
        "city": "New Orleans",
        "lat": 29.9509,
        "lon": -90.0758,
        "anchors": ["#Saints", "#NewOrleansSaints"],
    },
     "NYG": {
        "name": "New York Giants",
        "city": "New York",
        "lat": 40.7128,
        "lon": -74.0060,
        "anchors": ["#Giants", "#NewYorkGiants"],
    },
    "NYJ": {
        "name": "New York Jets",
        "city": "New York",
        "lat": 40.7128,
        "lon": -74.0060,
        "anchors": ["#Jets", "#NewYorkJets"],
    },
     "OAK": {
        "name": "Oakland Raiders",
        "city": "Oakland",
        "lat": 37.8044,
        "lon": -122.2712,
        "anchors": ["#Raiders", "#OaklandRaiders"],
    },
    "PHI": {
        "name": "Philadelphia Eagles",
        "city": "Philadelphia",
        "lat": 39.9526,
        "lon": -75.1652,
        "anchors": ["#Eagles", "#PhiladelphiaEagles"],
    },
    "PIT": {
        "name": "Pittsburgh Steelers",
        "city": "Pittsburgh",
        "lat": 40.4406,
        "lon": -79.9959,
        "anchors": ["#Steelers", "#PittsburghSteelers"],
    },
    "SEA": {
        "name": "Seattle Seahawks",
        "city": "Seattle",
        "lat": 47.6062,
        "lon": -122.3321,
        "anchors": ["#Seahawks", "#SeattleSeahawks"],
    },
    "SF": {
        "name": "San Francisco 49ers",
        "city": "San Francisco",
        "lat": 37.7749,
        "lon": -122.4194,
        "anchors": ["#49ers", "#Niners"],
    },
    "TB": {
        "name": "Tampa Bay Buccaneers",
        "city": "Tampa",
        "lat": 27.9517,
        "lon": -82.4588,
        "anchors": ["#Buccaneers", "#TampaBayBuccaneers"],
    },
    "TEN": {
        "name": "Tennessee Titans",
        "city": "Nashville",
        "lat": 36.1627,
        "lon": -86.7816,
        "anchors": ["#Titans", "#TennesseeTitans"],
    },
    "WAS": {
        "name": "Washington Redskins",
        "city": "Washington, D.C.",
        "lat": 38.9073,
        "lon": -77.0369,
        "anchors": ["#Redskins", "#WashingtonRedskins"],
    }
}


Loads NFL game data and converts game dates to datetime format. Also loads CBSA shapefiles and population estimates, filters the population data to metropolitan statistical areas only, and renames yearly population columns. An average population for each metro area (2011–2014) is computed and merged into the CBSA geospatial dataset using CBSA codes.

In [3]:
games = pd.read_csv("/Users/elisabethkollrack/Thesis/EK-thesis/games.csv")
games['gameday'] = pd.to_datetime(games['gameday'], format='%m/%d/%y')

# Load CBSA shapefile
cities = gpd.read_file(
    "/Users/elisabethkollrack/Thesis/EK-Thesis/tl_2014_us_cbsa/tl_2014_us_cbsa.shp"
)

# Load population CSV
pop_data = pd.read_csv(
    "/Users/elisabethkollrack/Thesis/EK-Thesis/cbsa_population.csv",
    encoding="latin1",
    dtype={"CBSA": str}
)

pop_metro = pop_data.loc[
    pop_data["LSAD"] == "Metropolitan Statistical Area",
    ["CBSA", "POPESTIMATE2011", "POPESTIMATE2012", "POPESTIMATE2013", "POPESTIMATE2014"]
].copy()

pop_metro = pop_metro.rename(columns={
    "POPESTIMATE2011": "population_2011",
    "POPESTIMATE2012": "population_2012",
    "POPESTIMATE2013": "population_2013",
    "POPESTIMATE2014": "population_2014",
})

pop_metro["population_avg_2011_2014"] = pop_metro[
    ["population_2011", "population_2012", "population_2013", "population_2014"]
].mean(axis=1)

cities = cities.merge(pop_metro, left_on="CBSAFP", right_on="CBSA", how="left")
cities = cities[cities["LSAD"] == "M1"].copy()


Generate latitude and longitude points for a geodesic circle on the Earth's surface centered at (lat, lon) with given radius. Uses spherical trigonometry to account for Earth's curvature.

In [4]:
def geodesic_circle(lat, lon, radius_km, n_points=360):
    earth_radius_km = 6371.0

    # Angles around the circle
    angles = np.linspace(0, 2 * np.pi, n_points)

    # Convert center point to radians
    lat_rad = np.radians(lat)
    lon_rad = np.radians(lon)

    # Compute circle coordinates in radians
    circle_lats = np.arcsin(
        np.sin(lat_rad) * np.cos(radius_km / earth_radius_km) +
        np.cos(lat_rad) * np.sin(radius_km / earth_radius_km) * np.cos(angles)
    )

    circle_lons = lon_rad + np.arctan2(
        np.sin(angles) * np.sin(radius_km / earth_radius_km) * np.cos(lat_rad),
        np.cos(radius_km / earth_radius_km) - np.sin(lat_rad) * np.sin(circle_lats)
    )

    # Convert back to degrees
    return np.degrees(circle_lats), np.degrees(circle_lons)


Estimates the geographic extent of each NFL team’s fan base by collecting geotagged tweets posted within 3 days of each regular-season game from 2011–2014 using team- and matchup-specific hashtags. Tweets are spatially joined to U.S. metropolitan areas (CBSAs), and distances from the team’s home city are computed using projected coordinates.

For each season, metro areas are aggregated to calculate tweet activity per 100,000 residents and average distance from the home city. Metro areas are sorted by distance, and a population-weighted baseline activity level is estimated using the outer 70% of cumulative metro population. The fandom radius is defined as the minimum distance where tweet activity falls to this baseline level.

An overall fandom radius is also computed across all seasons using average metro populations from 2011–2014. Results are stored for each team and season, and an interactive U.S. map is generated showing metro-level activity, the home city, and a geodesic circle representing the estimated fandom radius.


In [5]:
%%capture

POP_BASELINE_FRAC = 0.7
SEASON_MIN = 2011
SEASON_MAX = 2014

collection, client = get_connection(geotweets=True)

all_results = []

# Loop through each team configuration
for TEAM_ABBR, cfg in TEAM_CONFIG.items():

    TEAM_NAME = cfg["name"]
    CITY_NAME = cfg["city"]

    # Team home city coordinates
    CITY_CENTER_LAT = cfg["lat"]
    CITY_CENTER_LON = cfg["lon"]

    # Base hashtag anchors for team
    base_anchors = cfg["anchors"]

    # Filter regular season games for this team and season range
    team_games = games[
        (games['season'] >= SEASON_MIN) & (games['season'] <= SEASON_MAX) &
        ((games['home_team'] == TEAM_ABBR) | (games['away_team'] == TEAM_ABBR))
    ].sort_values(['season', 'gameday'])

    all_tweets = []

    # Collect tweets around each game
    for _, game in team_games.iterrows():
        gameday = game['gameday']

        # Identify opponent
        opponent = (
            game['away_team'] if game['home_team'] == TEAM_ABBR else game['home_team']
        )

        # Build hashtag anchors including matchup hashtags
        anchors = base_anchors.copy()
        anchors.extend([
            f"#{TEAM_ABBR}vs{opponent}",
            f"#{opponent}vs{TEAM_ABBR}",
        ])

        # 7-day window centered on game day
        start_date = gameday - timedelta(days=3)
        end_date = gameday + timedelta(days=3)
        dates = pd.date_range(start_date, end_date, freq='D')

        # Query tweets for each anchor and tag with season
        for anchor in anchors:
            tweets_list = list(get_ambient_tweets(anchor, dates, collection))
            for t in tweets_list:
                t["season"] = game["season"]
            all_tweets.extend(tweets_list)

    # Convert to DataFrame and clean
    geo_df = pd.DataFrame(all_tweets)
    geo_df = geo_df.drop_duplicates(subset="_id")
    geo_df = geo_df.dropna(subset=["tweet_created_at", "geo"])

    geo_df["tweet_created_at"] = pd.to_datetime(geo_df["tweet_created_at"], errors="coerce")

    # Extract coordinates from geo field
    geo_df["lon"] = geo_df["geo"].apply(lambda x: x["coordinates"][0] if isinstance(x, dict) else None)
    geo_df["lat"] = geo_df["geo"].apply(lambda x: x["coordinates"][1] if isinstance(x, dict) else None)
    geo_df = geo_df.dropna(subset=["lon", "lat"])

    # Convert to GeoDataFrame
    tweets_gdf = gpd.GeoDataFrame(
        geo_df,
        geometry=gpd.points_from_xy(geo_df.lon, geo_df.lat),
        crs="EPSG:4326"
    )

    # Match projection to city polygons
    if tweets_gdf.crs != cities.crs:
        tweets_gdf = tweets_gdf.to_crs(cities.crs)

    # Project team home city for distance calculations
    city_proj = gpd.GeoSeries(
        [Point(CITY_CENTER_LON, CITY_CENTER_LAT)],
        crs="EPSG:4326"
    ).to_crs(epsg=5070).iloc[0]

    # Season radius calculations
    for season in sorted(tweets_gdf["season"].dropna().unique()):

        pop_col = f"population_{int(season)}"

        season_tweets = tweets_gdf[tweets_gdf["season"] == season].copy()

        # Assign tweets to metro areas
        tweets_with_city = gpd.sjoin(
            season_tweets, cities, how="inner", predicate="within"
        )

        # Project and compute distance from home city
        tweets_proj = tweets_with_city.to_crs(epsg=5070)
        tweets_proj["distance_km"] = tweets_proj.geometry.distance(city_proj) / 1000

        # Aggregate by metro area
        metro_dist = (
            tweets_proj.groupby(["CBSAFP", "NAME"])
            .agg(
                tweet_count=("geometry", "count"),
                mean_distance_km=("distance_km", "mean"),
                population=(pop_col, "first")
            )
            .reset_index()
            .dropna(subset=["population"])
        )

        # Normalize tweet activity by population
        metro_dist["tweets_per_100k"] = metro_dist["tweet_count"] / metro_dist["population"] * 100_000
        metro_dist = metro_dist.sort_values("mean_distance_km")
        metro_dist["cum_population"] = metro_dist["population"].cumsum()

        # Determine baseline population threshold
        total_pop = metro_dist["population"].sum()
        pop_baseline = POP_BASELINE_FRAC * total_pop

        # Find distance where baseline population begins
        baseline_start_dist = metro_dist.loc[
            metro_dist["cum_population"] >= pop_baseline, "mean_distance_km"
        ].min()

        # Average activity in baseline region
        baseline_activity = metro_dist.loc[
            metro_dist["mean_distance_km"] >= baseline_start_dist, "tweets_per_100k"
        ].mean()

        # Radius where activity drops to baseline
        R_km = metro_dist.loc[
            metro_dist["tweets_per_100k"] <= baseline_activity, "mean_distance_km"
        ].min()

        # Fallback if no cutoff found
        if pd.isna(R_km):
            R_km = metro_dist["mean_distance_km"].max()

        # Store season result
        all_results.append({
            "team_name": TEAM_NAME,
            "city": CITY_NAME,
            "season": int(season),
            "lat": CITY_CENTER_LAT,
            "lon": CITY_CENTER_LON,
            "radius_km": R_km,
            "n_tweets": len(season_tweets),
            "n_metros": len(metro_dist)
        })

    
    # Overall radius calculations for 2011-2014 (same logic as above)
    tweets_overall = tweets_gdf.copy()

    tweets_with_city = gpd.sjoin(
        tweets_overall, cities, how="inner", predicate="within"
    )

    tweets_proj = tweets_with_city.to_crs(epsg=5070)
    tweets_proj["distance_km"] = tweets_proj.geometry.distance(city_proj) / 1000

    metro_dist = (
        tweets_proj.groupby(["CBSAFP", "NAME"])
        .agg(
            tweet_count=("geometry", "count"),
            mean_distance_km=("distance_km", "mean"),
            population=("population_avg_2011_2014", "first")
        )
        .reset_index()
        .dropna(subset=["population"])
    )

    metro_dist["tweets_per_100k"] = metro_dist["tweet_count"] / metro_dist["population"] * 100_000
    metro_dist = metro_dist.sort_values("mean_distance_km")
    metro_dist["cum_population"] = metro_dist["population"].cumsum()

    total_pop = metro_dist["population"].sum()
    pop_baseline = POP_BASELINE_FRAC * total_pop

    baseline_start_dist = metro_dist.loc[
        metro_dist["cum_population"] >= pop_baseline, "mean_distance_km"
    ].min()

    baseline_activity = metro_dist.loc[
        metro_dist["mean_distance_km"] >= baseline_start_dist, "tweets_per_100k"
    ].mean()

    R_km_overall = metro_dist.loc[
        metro_dist["tweets_per_100k"] <= baseline_activity, "mean_distance_km"
    ].min()

    if pd.isna(R_km_overall):
        R_km_overall = metro_dist["mean_distance_km"].max()

    # Save overall result
    all_results.append({
        "team_name": TEAM_NAME,
        "city": CITY_NAME,
        "season": "overall",
        "lat": CITY_CENTER_LAT,
        "lon": CITY_CENTER_LON,
        "radius_km": R_km_overall,
        "n_tweets": len(tweets_overall),
        "n_metros": len(metro_dist)
    })

    # Graphing
    
    # Generate circle for fandom radius
    circle_lat, circle_lon = geodesic_circle(
        CITY_CENTER_LAT, CITY_CENTER_LON, R_km_overall
    )

    # Average tweet coordinates per metro (for display)
    metro_coords = (
        tweets_with_city.groupby(["CBSAFP", "NAME"])
        .agg(lat=("lat", "mean"), lon=("lon", "mean"))
        .reset_index()
    )

    # Merge stats and coordinates
    metro_summary = metro_dist.merge(metro_coords, on=["CBSAFP", "NAME"])

    # Plot interactive map
    fig = px.scatter_geo(
        metro_summary,
        lat="lat",
        lon="lon",
        size="tweets_per_100k",
        hover_name="NAME",
        title=f"Geographic Radius of {TEAM_NAME} Fandom (2011–2014 Overall)",
        scope="usa"
    )

    # Add home city marker
    fig.add_trace(px.scatter_geo(
        lat=[CITY_CENTER_LAT],
        lon=[CITY_CENTER_LON]
    ).data[0])

    fig.data[-1].update(
        marker=dict(size=8, color="red"),
        name=CITY_NAME,
        showlegend=True
    )

    # Add fandom radius circle
    fig.add_trace(px.line_geo(lat=circle_lat, lon=circle_lon).data[0])
    fig.data[-1].update(
        name=f"Fandom radius ≈ {int(R_km_overall)} km",
        showlegend=True
    )

    # Format title
    fig.update_layout(title_x=0.5, title_font_size=20)

    # Save interactive HTML
    out_dir = "/Users/elisabethkollrack/Thesis/EK-Thesis/Fandom Radii/Interactive Graphs"
    os.makedirs(out_dir, exist_ok=True)

    outfile = os.path.join(out_dir, f"{TEAM_ABBR}_overall_fandom_radius.html")
    fig.write_html(outfile)


Export results

In [6]:
results_df = pd.DataFrame(all_results)
results_df.to_csv(
    "/Users/elisabethkollrack/Thesis/EK-Thesis/Fandom Radii/fandom_radius_all_teams.csv",
    index=False
)