In [1]:
import json
import csv
import shapely

from pandas import DataFrame, read_csv

# csa_data_frame = read_csv('./csa-est2024-alldata.csv')

# print(csa_data_frame.query("STCOU == 29510")["POPESTIMATE2020"])
def load_counties_data():
    with open("./counties.geojson") as f:
      counties_geojson = json.load(f)
      
      co_data_frame = read_csv('./co-est2024-alldata.csv', encoding='iso-8859-1')

    for feature in counties_geojson["features"]:
        coords_stack = [ feature["geometry"]["coordinates"] ]

        while isinstance(coords_stack[-1][0], list):
            coords_stack.append(coords_stack[-1][0]) 

        coords = coords_stack[-2]    
        try: 
            raw_polygon = shapely.geometry.Polygon(coords) 
            state = feature["properties"]["STATEFP"]
            county = feature["properties"]["COUNTYFP"]
            co_data_frame.loc[co_data_frame.query(f"STATE == {state} and  COUNTY == {county}").index, "centroid"] = raw_polygon.centroid
        except Exception as e:
            print(e)
            print("Invalid coordinates:", feature["properties"]["Name"])  
    return (counties_geojson, co_data_frame)

(counties_geojson, co_data_frame) = load_counties_data()


In [2]:
import numpy as np
import pandas as pd
import json

def haversine_miles(centroid, lat2, lon2):
    r = 3958.8  # earth radius miles
    lat1 = centroid.y
    lon1 = centroid.x
    phi1 = np.radians(lat1); phi2 = np.radians(lat2)
    dphi = phi2 - phi1
    dlambda = np.radians(lon2 - lon1)
    a = np.sin(dphi/2.0)**2 + np.cos(phi1)*np.cos(phi2)*np.sin(dlambda/2.0)**2
    return 2*r*np.arcsin(np.sqrt(a))

leagues = {
    'NFL': {
        "weight": 0.9
    },
    'NBA': {
        "weight": 0.6
    },
    'MLB': {
        "weight": 0.5
    },
    'NHL': {
        "weight": 0.25
    }, 
    'MLS': {
        "weight":0.15
    }
}    


for league in leagues.keys():
  with open(f"./teams_{league}.json") as f:
    leagues[league]["json"] = json.load(f)

    teams = pd.DataFrame(leagues[league]["json"]["teams"])

    teams["S"] = 0.0 
    teams["L"] = 0.5  
      
    # compute distance matrix
    d = np.zeros((len(co_data_frame), len(teams)))
    for i, c in co_data_frame.iterrows():
        if not pd.isna(c["centroid"]):
            for j, t in teams.iterrows():
                d[i,j] = haversine_miles(c["centroid"], t["coordinates"]['lat'], t["coordinates"]['lon'])


    leagues[league]["teams"] = teams   
    leagues[league]["distances"] = d  

In [3]:
import numpy as np
import pandas as pd

beta = 0.005  # distance decay
alpha = 1.0  # long-term allegiance weight
T_softmax = 0.05  # competition temperature   
not_nearest_multiplier = 2.0 # added to distance multiplied (d - nearest_d) 
not_same_state_multiplier = 1.5 # TODO distance multipled if not same state

for league in leagues.keys(): 
    d = leagues[league]["distances"] 
    
    # compute raw R_{c,t}
    nearest_key = "nearest"
    R = np.zeros_like(d)
    P_l = leagues[league]["weight"]

    co_data_frame[nearest_key] = float('nan')
    for j, t in teams.iterrows():
        for i, c in co_data_frame.iterrows():
            nearest = co_data_frame.loc[i, nearest_key]
            if pd.isna(nearest) or d[i,j] < nearest:
                co_data_frame.loc[i, nearest_key] = d[i,j] 
    
    for j, t in leagues[league]["teams"].iterrows():
        for i, c in co_data_frame.iterrows():
            effective_d = d[i,j]
            nearest_d = c[nearest_key]
            if not pd.isna(nearest_d) and effective_d > nearest_d:
                effective_d = nearest_d + ((effective_d - nearest_d) * not_nearest_multiplier)
            if c["STNAME"] != t["state"]:
                effective_d *= not_same_state_multiplier     
            D = np.exp(-beta * effective_d)
            R[i,j] = P_l * D * (1.0 + t["S"]) * (alpha * t["L"]) 

    expR = np.exp(R / T_softmax)
    leagues[league]["shares"] =  expR / expR.sum(axis=1, keepdims=True)    
    leagues[league]["Rs"] = R
  

In [94]:
for league in leagues.keys(): 
    d = leagues[league]["distances"]
    R = leagues[league]["Rs"]
    
    shares = leagues[league]["shares"]
    fans = (shares * co_data_frame['POPESTIMATE2020'].values.reshape(-1,1)).astype(int)
    
    df_out = []
    for i, c in co_data_frame.iterrows():
        for j, t in leagues[league]["teams"].iterrows():
            if shares[i,j] > 0.05:
                df_out.append({
                    'county': c['CTYNAME'],
                    'countyfp': c['COUNTY'],
                    'state': c['STNAME'],
                    'statefp': c['STATE'],
                    'team_name': t['name'],
                    'distance_miles': round(d[i,j],1),
                    'raw_R': R[i,j],
                    'share': f"{round(shares[i,j] * 100, 1)}%",
                    'fans': fans[i,j]
            })
    leagues[league]["dfs"] = pd.DataFrame(df_out)

In [100]:
default_style = {
        "color": "grey",
        "weight": 1,
        "fillColor": "grey",
        "fillOpacity": 0.1,
    }

for feature in counties_geojson["features"]:
    feature["properties"]["style"] = default_style   

def highlight(league, teamname, color): 
    for feature in counties_geojson["features"]:
        statefp = feature["properties"]["STATEFP"]
        countyfp = feature["properties"]["COUNTYFP"]

        county_rows = leagues[league]["dfs"].query(f"statefp == {statefp} & countyfp == {countyfp}").sort_values(by="fans", ascending=False)
        if county_rows.shape[0] > 0:
            if county_rows.iloc[0]["team_name"] == teamname:
                feature["properties"]["style"] = {
                "color": "grey",
                "weight": 1,
                "fillColor": color,
                "fillOpacity": 0.3,
                }

# highlight("NHL", "St. Louis Blues", "blue")
# highlight("NHL", "Chicago Blackhawks", "red")
# highlight("NHL", "Minnesota Wild", "green")
# highlight("NHL", "Nashville Predators", "yellow")

In [101]:
from ipyleaflet import Map, Marker, MarkerCluster, GeoJSON, Popup, FullScreenControl
from shapely.geometry import Polygon, Point
from shapely.ops import transform
import ipywidgets as widgets
import pandas as pd
import shapely

map = Map(center=[38.533899, -90.50261], zoom=8, scroll_wheel_zoom=True)

output = widgets.Textarea() 
output.value = ""

marker_cluster = MarkerCluster()
map.add(marker_cluster)

def debug(message):
    output.value += str(message) + "\n"

def nearest_teams(centroid): 
    league_distances = {}
    for league_entry in league_jsons.items():
      name_and_distance = ("", 1000000000, None)
      for team in league_entry[1]["teams"]:
        coordinates = team["coordinates"]
        distance = haversine_miles(centroid, coordinates["lon"], coordinates["lat"])
        if distance < name_and_distance[1]:
           name_and_distance = (team["name"], distance, (coordinates["lat"], coordinates["lon"]))  
      league_distances[league_entry[0]] = name_and_distance  
    return league_distances;

def show_nearests(event, feature, **kwargs): 
   state = feature["properties"]["STATEFP"]
   county = feature["properties"]["COUNTYFP"]
   row = co_data_frame.query(f"STATE == {state} & COUNTY == {county}") 
   
   centroid = row["centroid"].iloc[0]
   population = row["POPESTIMATE2020"].iloc[0]

   nearests = nearest_teams(centroid)

   leagues_table = ""
   markers = []
   
   for nearest in nearests.items():
       leagues_table +=  f'<tr><td>Nearest {nearest[0]}: {nearest[1][0]}</td><td>{nearest[1][1]:.2f}</td></tr>'
       markers.append(Marker(location=nearest[1][2], draggable=False))

   marker_cluster.markers = markers
    
   popup = Popup(
       location=(centroid.y, centroid.x),
       child=widgets.HTML("<table border='1'>" +
           f'<caption>{feature["properties"]["Name"]}</caption>' +    
          leagues_table +
           f'<tr><td>Population:</td><td>{population}</td></tr>' +
           "</table>"               
       )
   )
   map.add(popup)

def show_teams(event, feature, **kwargs): 
    statefp = feature["properties"]["STATEFP"]
    countyfp = feature["properties"]["COUNTYFP"]
    row = co_data_frame.query(f"STATE == {statefp} & COUNTY == {countyfp}") 
    centroid = row["centroid"].iloc[0]

    all_county_rows = pd.DataFrame()    
    leagues_table = ""  
    for league in leagues.keys():
        county_rows = leagues[league]["dfs"].query(f"statefp == {statefp} & countyfp == {countyfp}") 
        county_rows["league"] = league
        all_county_rows = pd.concat([all_county_rows, county_rows], ignore_index=True)

    all_county_rows = all_county_rows.sort_values(by="fans", ascending=False)
    for i, county_row in all_county_rows.iterrows():
        leagues_table += (
             "<tr>"  
             f"<td>{county_row['league']}</td>" 
             f"<td>{county_row['team_name']}</td>" 
             f"<td>{county_row['share']}</td>" 
             f"<td>{county_row['fans']}</td>" 
             "</tr>")
       
    popup = Popup(location=(centroid.y, centroid.x), 
                  child=widgets.HTML("<table border='1' style='border-collapse: collapse'>" +
                  f'<caption>{feature["properties"]["Name"]}</caption>' +    
                  leagues_table +
                  "</table>"  
        )
    )
    map.add(popup)    
    

layer = GeoJSON(data = counties_geojson, 
    hover_style = {"fillColor": "white"}
)

layer.on_click(show_teams)

map.add(layer)

map.add(FullScreenControl())


Map(center=[38.533899, -90.50261], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title',…

In [87]:
map.fullscreen = True