In [45]:
import pandas as pd
from data_mountain_query.query import get_ambient_tweets
from data_mountain_query.connection import get_connection
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from datetime import timedelta
import plotly.express as px
from shapely import Point

In [46]:
games = pd.read_csv("/Users/elisabethkollrack/Thesis/EK-thesis/games.csv")
games = games[games['game_type'] == 'REG']
games['gameday'] = pd.to_datetime(games['gameday'], format='%m/%d/%y')

ne_games = games[
    (games['season'] >= 2013) & (games['season'] <= 2017) &
    ((games['home_team'] == 'NE') | (games['away_team'] == 'NE'))
].sort_values(['season', 'gameday'])


In [47]:
collection, client = get_connection(geotweets=True)

Connecting on mgmt1.vacc.uvm.edu


In [48]:
%%capture
all_tweets = []
for index, game in ne_games.iterrows():
    gameday = game['gameday']
    
    # Determine opponent
    if game['home_team'] == 'NE':
        opponent = game['away_team']
    else:
        opponent = game['home_team']

    anchors = [
        "#newenglandpatriots", 
        "#patriots",
        f"#NEvs{opponent}",
        f"#{opponent}vsNE"
    ]

    start_date = gameday - timedelta(days=3)
    end_date = gameday + timedelta(days=3)
    dates = pd.date_range(start_date, end_date, freq='D')

    for anchor in anchors:
        tweets_list1 = [t for t in get_ambient_tweets(anchor, dates, collection)]
        all_tweets.extend(tweets_list1)



In [49]:
geo_df = pd.DataFrame(all_tweets)

# Extract lon and lat from the 'geo' column
geo_df['lon'] = geo_df['geo'].apply(lambda x: x['coordinates'][0] if isinstance(x, dict) else None)
geo_df['lat'] = geo_df['geo'].apply(lambda x: x['coordinates'][1] if isinstance(x, dict) else None)

geo_df.head()

Unnamed: 0,_id,created_at,id,id_str,text,source,truncated,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,...,twitter_entities,twitter_filter_level,twitter_lang,retweetCount,gnip,twitter_extended_entities,inReplyTo,long_object,lon,lat
0,5e447d12f765e1127af9bbb3,Thu Sep 05 15:22:32 +0000 2013,375640096269017100,375640096269017089,use my patriots jersey on game days \n#SoIKnow...,"<a href=""http://twitter.com/download/android"" ...",False,,,,...,,,,,,,,,-97.451755,25.900049
1,5e448d4df765e1127a1c1ca3,Sun Sep 08 14:30:46 +0000 2013,376714232500588540,376714232500588544,#nflgameday apparel #patriots #newenglandpatri...,"<a href=""http://instagram.com"" rel=""nofollow"">...",False,,,,...,,,,,,,,,-71.269603,41.675478
2,5e4488dbf765e1127a129951,Sun Sep 08 17:05:37 +0000 2013,376753202013560800,376753202013560833,Lets go Patriots!!!!!!!! #GameDay #Patriots @P...,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,...,,,,,,,,,-117.827185,33.698094
3,5e448d5ef765e1127a1c5c83,Sun Sep 08 14:56:08 +0000 2013,376720616390291460,376720616390291456,Lets Go Patriots!!#PatsNation #Patriots #brady,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,...,,,,,,,,,-78.879703,43.04775
4,5e448cedf765e1127a1b4b7c,Sun Sep 08 13:33:23 +0000 2013,376699791750594560,376699791750594561,Then later patriots vs bills. #Patriots #letsgo,"<a href=""http://twitter.com/download/iphone"" r...",False,,,,...,,,,,,,,,-75.265707,43.024112


In [50]:
cities = gpd.read_file(
    "/Users/elisabethkollrack/Thesis/EK-Thesis/tl_2024_us_cbsa/tl_2024_us_cbsa.shp"
)

pop_data = pd.read_csv(
    "/Users/elisabethkollrack/Thesis/EK-Thesis/cbsa_population.csv",
    encoding="latin1"
)

# Keep metro-level population only
pop_metro = pop_data.loc[
    pop_data["LSAD"] == "Metropolitan Statistical Area",
    ["CBSA", "POPESTIMATE2024"]
].rename(columns={
    "POPESTIMATE2024": "population_2024"
})

# Ensure consistent CBSA identifier types
cities["GEOID"] = cities["GEOID"].astype(str)
pop_metro["CBSA"] = pop_metro["CBSA"].astype(str)

# Merge population into CBSA geometries
cities = cities.merge(
    pop_metro,
    left_on="GEOID",
    right_on="CBSA",
    how="left"
)

# (Optional) Keep only major metros
cities = cities[cities["LSAD"] == "M1"]

# Tweets → GeoDataFrame
tweets_gdf = gpd.GeoDataFrame(
    geo_df,
    geometry=gpd.points_from_xy(geo_df.lon, geo_df.lat),
    crs="EPSG:4326"
)

# Spatial join tweets to metros
tweets_with_city = gpd.sjoin(
    tweets_gdf,
    cities,
    how="left",
    predicate="within"
)



CRS mismatch between the CRS of left geometries and the CRS of right geometries.
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:4269




In [51]:
# Boston center
boston_lat = 42.3601
boston_lon = -71.0589
boston_center = Point(boston_lon, boston_lat)

# Project tweets to meters
tweets_proj = tweets_with_city.to_crs(epsg=5070)
boston_proj = gpd.GeoSeries([boston_center], crs="EPSG:4326").to_crs(epsg=5070).iloc[0]

tweets_proj["distance_km"] = tweets_proj.geometry.distance(boston_proj) / 1000

# Collapse tweets to metro level
metro_dist = (
    tweets_proj
    .groupby(["GEOID", "NAME"])
    .agg(
        tweet_count=("geometry", "count"),
        mean_distance_km=("distance_km", "mean"),
        population_2024=("population_2024", "first")
    )
    .reset_index()
)

# Minimum population cutoff
MIN_POP = 50_000
metro_dist = metro_dist.dropna(subset=["population_2024"])
metro_dist = metro_dist[metro_dist["population_2024"] >= MIN_POP].copy()

# Normalize tweets per 100k
metro_dist["tweets_per_100k"] = metro_dist["tweet_count"] / metro_dist["population_2024"] * 100_000

# Sort metros and compute cumulative population
metro_dist = metro_dist.sort_values("mean_distance_km")
metro_dist["cum_population"] = metro_dist["population_2024"].cumsum()

# Population-adjusted baseline
POP_BASELINE = 50_000_000
baseline_start_dist = metro_dist.loc[metro_dist["cum_population"] >= POP_BASELINE, "mean_distance_km"].iloc[0]
baseline_activity = metro_dist.loc[metro_dist["mean_distance_km"] >= baseline_start_dist, "tweets_per_100k"].mean()

# Fandom radius
R_km = metro_dist.loc[metro_dist["tweets_per_100k"] <= baseline_activity, "mean_distance_km"].min()

# Geodesic circle
def geodesic_circle(lat, lon, radius_km, n_points=360):
    earth_radius_km = 6371.0
    angles = np.linspace(0, 2*np.pi, n_points)

    lat_rad = np.radians(lat)
    lon_rad = np.radians(lon)

    circle_lats = np.arcsin(
        np.sin(lat_rad) * np.cos(radius_km / earth_radius_km) +
        np.cos(lat_rad) * np.sin(radius_km / earth_radius_km) * np.cos(angles)
    )

    circle_lons = lon_rad + np.arctan2(
        np.sin(angles) * np.sin(radius_km / earth_radius_km) * np.cos(lat_rad),
        np.cos(radius_km / earth_radius_km) - np.sin(lat_rad) * np.sin(circle_lats)
    )

    return np.degrees(circle_lats), np.degrees(circle_lons)

circle_lat, circle_lon = geodesic_circle(boston_lat, boston_lon, R_km)

# Metro centroids for plotting
metro_coords = (
    tweets_with_city.groupby(["GEOID", "NAME"])
    .agg(lat=("lat", "mean"), lon=("lon", "mean"))
    .reset_index()
)

metro_summary = metro_dist.merge(metro_coords, on=["GEOID", "NAME"])

# Filter for valid metros
metro_summary = metro_summary[
    (metro_summary["tweet_count"] >= 10) &
    (metro_summary["tweets_per_100k"].notna())
].copy()

# Plotly map
fig = px.scatter_geo(
    metro_summary,
    lat="lat",
    lon="lon",
    size="tweets_per_100k",
    hover_name="NAME",
    hover_data={"tweets_per_100k": True},
    title="Geographic Radius of Patriots Fandom (2013–2017, population-adjusted baseline)",
    scope="usa"
)

# Add geodesic fandom circle
fig.add_trace(px.line_geo(lat=circle_lat, lon=circle_lon).data[0])
fig.data[-1].update(line=dict(width=3, dash="dash"), name=f"Fandom radius ≈ {int(R_km)} km", showlegend=True)

# Add Boston marker
fig.add_trace(px.scatter_geo(lat=[boston_lat], lon=[boston_lon]).data[0])
fig.data[-1].update(marker=dict(size=12), name="Boston")

# Map styling
fig.update_geos(
    scope="usa",
    showland=True,
    landcolor="lightgray",
    showocean=True,
    oceancolor="azure",
    showlakes=True,
    lakecolor="azure",
    showrivers=True,
    rivercolor="azure"
)

fig.update_layout(
    title_x=0.5,
    title_font_size=20,
    geo=dict(bgcolor='rgba(0,0,0,0)', landcolor='lightgray', lakecolor='azure')
)

fig.show()

In [52]:
# save as html
fig.write_html("/Users/elisabethkollrack/Thesis/EK-thesis/patriots_fandom_radius.html")