In [None]:
from dataclasses import dataclass
import random
from typing import List

import pandas as pd
import numpy as np

In [None]:
@dataclass(frozen=True)
class Player:
    id: int
    first_name: str
    last_name: str
    grade: int
    skill: int
    unavailable_days: List[str]
    # TODO: should these fields be fixed? or extendable?
    def __repr__(self):
        return f"<{self.first_name} {self.last_name} s={self.skill} g={self.grade} {self.unavailable_days}>"
        
class Team:
    def __init__(self, name: str, practice_day: str) -> "Team":
        self.name = name
        self.practice_day = practice_day
        self.players = set()
        # TODO: practice day, time?, location
    
    def add_player(self, player: Player) -> None:
        # check player isn't already on team?
        self.players.add(player)
    
    def remove_player(self, player: Player) -> None:
        self.players.remove(player)
    
    def get_skill(self) -> int:
        return sum([player.skill for player in self.players])
    
    def get_grade(self) -> float:
        return sum([player.grade for player in self.players])
    
    def __repr__(self):
        return (
            f"Team {self.name} {self.practice_day} ("
            f"size={len(self.players)}, "
            f"skill={self.get_skill()}, "
            f"grade={self.get_grade()/len(self.players) if len(self.players) > 0 else 0:.2f}): "
            f"{self.players}"
        )

class League:
    def __init__(self, teams: List[Team]):
        self.teams = teams

    def move_player(self, player: Player, team: Team):
        # TODO: if player is already on team, remove them first
        # maybe this should have a from and to team and add_player should be separate...
        
        team.add_player(player)
    
    def remove_player(self, player: Player, team: Team):
        team.remove_player(player)
    
    def run_proposed_moves(self, proposed_moves):
        for player, team_from, team_to in proposed_moves:
            self.move_player(player, team_to)
    
    def undo_proposed_moves(self, proposed_moves):
        for player, team_from, team_to in proposed_moves:
            if team_from is None:
                self.remove_player(player, team_to)
            else:
                self.move_player(player, team_from)
    
    def __repr__(self):
        s = f"{len(self.teams)} Teams:\n"
        for team in self.teams:
            s += f"{team}\n"
        return s

In [None]:
# SET UP LEAGUE WITH TOY DATA
random.seed(5)

WEEKDAYS = ["M", "T", "W", "R", "F"]

players = []
for i, letter in enumerate("ABCDEFGHIJKLMNOPQ"):
    grade = random.randint(4, 5)
    skill = random.randint(1, 5) if i != 0 else 10
    players.append(Player(
        id=i,
        first_name=letter,
        last_name=letter,
        grade=grade,
        skill=skill,
        unavailable_days=random.choice(WEEKDAYS)
    ))

teams=[]
for i in range(1, 6):
    teams.append(Team(name=str(i), practice_day=WEEKDAYS[i-1]))    
    
league = League(teams=teams)
for i, player in enumerate(players[:-2]):
    # Optimized assignment
    optimize_player_assignment(player, league)
    print(f"~~~ {score_skill(league):.2f} {score_grade(league):.2f} {score_size(league):.2f} ~~~")
    
print(league)

In [None]:
reg_data = pd.read_csv("exampleRegistrationData.csv")
reg_data.head()

In [None]:
import matplotlib.pyplot as plt

In [None]:
def mad(x): 
    return np.nanmedian(abs(x - np.nanmedian(x)))

In [None]:
practice_field_locations = {
    "danehy": (42.38980588369338, -71.13301159861518),
    "gsm": (42.37292538868882, -71.08361099742957),
    "common": (42.376852476244125, -71.12084010564928),
}

In [None]:
long = reg_data.Longitude.values
lat = reg_data.Latitude.values
plt.scatter(reg_data.Longitude, reg_data.Latitude, marker=".")

plt.xlim(np.nanmedian(long) - 5 * mad(long), np.nanmedian(long) + 5 * mad(long))
plt.ylim(np.nanmedian(lat) - 5 * mad(lat), np.nanmedian(lat) + 5 * mad(lat))

for k, v in practice_field_locations.items():
    print(v)
    plt.scatter(v[1], v[0])

In [None]:
def optimize_player_assignment(player: Player, league: League):
    best_moves = None
    best_score = -1
    for team in teams:
        proposed_moves = [(player, None, team)]
        league.run_proposed_moves(proposed_moves)
        # N.B. this is pretty inefficient. If we need this to be faster,
        # we could change how the score/checkers work.
        score = score_league(league)
        breaks_constraint = breaks_schedule_constraint(league)
        league.undo_proposed_moves(proposed_moves)
        if not breaks_constraint and score > best_score:
            best_score = score
            best_moves = proposed_moves
    league.run_proposed_moves(best_moves)
    
    
def score_league(league: League):
    return 1 * score_skill(league) + 0 * score_grade(league) + 0 * score_size(league)

def breaks_schedule_constraint(league: League):
    """True if schedule constraint is broken. False if it's fine."""
    for team in league.teams:
        for player in team.players:
            if team.practice_day in player.unavailable_days:
                return True
    return False

In [None]:
# CONVENIENCE SCORERS
def score_convenience(league: League):
    score = 1 
    for team in league.teams:
        for player in team.players:
            pass
            # calculate distance to field
    return score

In [None]:
# PARITY SCORERS

# TODO: the sigmas shouldn't depend on the current league config? 
# Right now it depends on the full league at the time of calculation

def score_size(league: League):
    sizes = [len(team.players) for team in league.teams]
    ideal_size = sum(sizes) / len(sizes)
    # N.B sigma normalizes squared errors. We really only care about expressibility between 0 and 1.
    sigma = 5 * np.std(sizes)
    if sigma == 0:
        return 1
    squared_errors = [
        (size - ideal_size)**2 / sigma
        for size in sizes
    ]
    return max(0, 1 - sum(squared_errors) / len(squared_errors))
    
def score_grade(league: League):
    grades = [team.get_grade() for team in league.teams]
    ideal_grade = sum(grades) / len(grades)
    # N.B sigma normalizes squared errors. We really only care about expressibility between 0 and 1.
    sigma = 5 * np.std(grades)
    if sigma == 0:
        return 1
    squared_errors = [
        (grade - ideal_grade)**2 / sigma
        for grade in grades
    ]
    return max(0, 1 - sum(squared_errors) / len(squared_errors))

def score_skill(league: League):
    # Returns a score [0, 1]
    # Ideal skill distribution is full equality
    # If we want we could break this down into equality for each tier of player.
    team_skills = [team.get_skill() for team in league.teams]
    ideal_skill = sum(team_skills) / len(team_skills)
    # N.B sigma normalizes squared errors. We really only care about expressibility between 0 and 1.
    sigma = 5 * np.std(team_skills)
    if sigma == 0:
        return 1
    squared_errors = [
        (skill - ideal_skill)**2 / sigma
        for skill in team_skills
    ]
    return max(0, 1 - sum(squared_errors) / len(squared_errors))

In [None]:
reg_data = pd.read_csv("input - exampleRegistrationData.csv")
print(reg_data.shape)
reg_data.head()

In [None]:
parent_requests = pd.read_csv("input - exampleParentRequests.csv")
print(parent_requests.shape)
parent_requests.head()

In [None]:
player_data = reg_data.merge(
    parent_requests,
    how="left",
    left_on="Lastname",
    right_on="Player Last Name"
)
# Names seem borked. I'll pretend I can join on last name,
# but IRL this doesn't work. Ideally the input is a single
# csv with some constraints?
player_data.head()

In [None]:
print(player_data.Assessment.value_counts())
print()
print(player_data.Grade.value_counts())

In [None]:
# SET UP LEAGUE WITH REAL DATA
# # Set up players
# players = []
# for i, row in player_data.iterrows():
#     skill = row.Assessment
#     if pd.isna(skill) or not skill[0].isdigit():
#         continue
#         # Ignore blank, Premier, Kick Start (ask Jason later)
#     p = Player(
#         id=i,
#         first_name=row.Firstname,
#         last_name=row.Lastname,
#         grade=row.Grade,
#         skill=int(skill[0]),
#     )
#     players.append(p)
# players = sorted(players, key=lambda p: p.skill)

# # Set up teams
# teams = []
# for name in player_data["Assigned Team"].unique():
#     if pd.isnull(name):
#         continue
#     teams.append(Team(name))
# teams = sorted(teams, key=lambda t: t.name)
# print(teams)

# for i, player in enumerate(players):
#     teams = assign_player(player, teams)
#     best_team = None
#     best_score = 0
#     teams[i % len(teams)].add_player(player)
#     print(get_skill_score(teams))