In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
# Load Teams from KenPom and ESPN
from selenium import webdriver
from march_sadness import KPTeam, ESPNTeam


# Set up Selenium webdriver with GeckoDriver
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
with webdriver.Firefox(options=options) as driver:
    kp_teams = KPTeam.list_all(driver)
print(f'Loaded {len(kp_teams)} teams from KenPom')

espn_teams = ESPNTeam.list_all()

print(f'Loaded {len(espn_teams)} teams from ESPN')


Loaded 363 teams from KenPom
Loaded 360 teams from ESPN


In [3]:
# Matching the KP and ESPN teams is an assignment problem, we need to generate a graph
# Picking the best match for each ESPN team would probably work fine, but could lead to duplicates. Also this is more fun.
# https://en.wikipedia.org/wiki/Assignment_problem


from fuzzywuzzy import fuzz
import numpy as np
from scipy.optimize import linprog


def edge_weight(espn_team: ESPNTeam | None, kp_team: KPTeam | None):
    if espn_team is None or kp_team is None:
        return 0

    # Special cases for the problem children
    if espn_team.id == '41' or kp_team.name == 'Connecticut':
        return 1 if espn_team.id == '41' and kp_team.name == 'Connecticut' else 0
    if espn_team.id == '82' or kp_team.name == 'Illinois Chicago':
        return 1 if espn_team.id == '82' and kp_team.name == 'Illinois Chicago' else 0
    if espn_team.id == '112358' or kp_team.name == 'LIU':
        return 1 if espn_team.id == '112358' and kp_team.name == 'LIU' else 0

    disp_ratio = fuzz.ratio(espn_team.displayName, kp_team.name)
    nick_ratio = fuzz.ratio(espn_team.nickname, kp_team.name)
    lin_weight = max(disp_ratio, nick_ratio) / 100
    return lin_weight


def gen_graph(espn_teams: list[ESPNTeam], kp_teams: list[KPTeam]):
    espn_nodes: list[ESPNTeam | None] = espn_teams.copy()
    kp_nodes: list[KPTeam | None] = kp_teams.copy()

    if len(espn_nodes) > len(kp_nodes):
        kp_nodes.extend([None] * (len(espn_nodes) - len(kp_nodes)))
    elif len(kp_nodes) > len(espn_nodes):
        espn_nodes.extend([None] * (len(kp_nodes) - len(espn_nodes)))
    
    edges = np.empty((len(espn_nodes), len(kp_nodes)))
        
    for r, espn_team in enumerate(espn_nodes):
        for c, kp_team in enumerate(kp_nodes):
            edges[r][c] = edge_weight(espn_team, kp_team)
    return edges, espn_nodes, kp_nodes


def solve_assignment(espn_teams: list[ESPNTeam], kp_teams: list[KPTeam]):
    # Maximize sum graph_edges[i] * x[i]
    edges, espn_nodes, kp_nodes = gen_graph(espn_teams, kp_teams)

    c = -1 * edges.flatten()

    # Each row of A_eq, b_eq is of the form: sum_i A_ub[row, i] * x[i] = b_eq[row]
    eq_count = len(espn_nodes) + len(kp_nodes)
    A_eq = np.zeros((eq_count, len(c)))
    for i, _ in enumerate(espn_nodes):
        for j, _ in enumerate(kp_nodes):
            A_eq[i][i * len(kp_nodes) + j] = 1
            A_eq[j + len(espn_nodes)][i * len(kp_nodes) + j] = 1
    b_eq = np.ones(eq_count)

    return linprog(c, A_eq=A_eq, b_eq=b_eq, bounds=(0, 1)), edges
    # return A_eq


solution, graph_edges, = solve_assignment(espn_teams, kp_teams)
sol_nd = np.ndarray(shape=graph_edges.shape, buffer=solution.x)

In [4]:
for i, row in enumerate(sol_nd):
    j, _ = max(enumerate(row), key=lambda x: x[1])
    espn_name = espn_teams[i].displayName if i < len(espn_teams) else 'None'
    espn_id = espn_teams[i].id if i < len(espn_teams) else '0'
    kp_name = kp_teams[j].name if j < len(kp_teams) else 'None'

    print(f'{espn_id:6} {espn_name:37} - {kp_name}')

44     American University Eagles            - American
9      Arizona State Sun Devils              - Arizona St.
12     Arizona Wildcats                      - Arizona
8      Arkansas Razorbacks                   - Arkansas
2      Auburn Tigers                         - Auburn
91     Bellarmine Knights                    - Bellarmine
68     Boise State Broncos                   - Boise St.
71     Bradley Braves                        - Bradley
13     Cal Poly Mustangs                     - Cal Poly
25     California Golden Bears               - California
38     Colorado Buffaloes                    - Colorado
36     Colorado State Rams                   - Colorado St.
48     Delaware Blue Hens                    - Delaware
50     Florida A&M Rattlers                  - Florida A&M
57     Florida Gators                        - Florida
52     Florida State Seminoles               - Florida St.
45     George Washington Colonials           - George Washington
46     Georgetown Hoyas   