## Initalize notebook

In [1]:
import typing as T

from domain import Team, Player

import pandas as pd
from itertools import combinations
import random
import time
import datetime

pd.set_option("display.max_columns", 500)

## Load data

In [2]:
# load projections and salary data
df_projections = pd.read_csv("data/fantasy_pros_data_week13.csv")
df_salary = pd.read_csv("data/roto_guru_data_week13.csv")

# only keep a few projection columns
df_projections_merge = df_projections[
    [
        "name_position_key",
        "start_sit_score",
        "start_sit_grade",
        "projected_points",
        "position_rank",
        "player_owned_pct",
    ]
]

# merge data
df_players = pd.merge(
    left=df_salary, right=df_projections_merge, how="left", on="name_position_key"
)

# fill NaN with 0
df_players["start_sit_score"] = df_players["start_sit_score"].fillna(0)
df_players["projected_points"] = df_players["projected_points"].fillna(0)
df_players["position_rank"] = df_players["position_rank"].fillna(99).astype(int)
df_players["player_owned_pct"] = df_players["player_owned_pct"].fillna(0)
df_players["projected_points_per_salary"] = (
    df_players["projected_points"] / df_players["salary"] * 1000
)

df_players.head()

Unnamed: 0,gid,position,name,team,opponent,home/away,salary,salary_change,total_points,games_played,points_per_game,points_per_game_per_salary,points_per_game_alt,bye_week,ytd_salary_high/low,name_position_key,start_sit_score,start_sit_grade,projected_points,position_rank,player_owned_pct,projected_points_per_salary
0,1501,QB,"Prescott, Dak",dal,bal,A,0,0,151.64,5,30.33,0.0,30.33,10,L,dak-prescott-qb,0.0,,0.0,99,0.0,
1,5536,RB,"McCaffrey, Christian",car,,H,0,0,90.3,3,30.1,0.0,30.1,13,L,christian-mccaffrey-rb,0.0,,0.0,99,0.0,
2,1523,QB,"Mahomes II, Patrick",kan,den,H,8200,200,314.78,11,28.62,3.49,28.62,10,H,patrick-mahomes-qb,4.33,A+,24.2,1,100.0,2.95122
3,1537,QB,"Murray, Kyler",ari,lar,H,7600,-600,311.56,11,28.32,3.73,28.32,8,,kyler-murray-qb,3.0,B,19.0,8,99.9,2.5
4,1412,QB,"Wilson, Russell",sea,nyg,H,7700,200,300.54,11,27.32,3.55,27.32,6,,russell-wilson-qb,4.0,A,22.3,2,100.0,2.896104


In [3]:
# load schedule data
df_schedule = pd.read_csv("data/football_locks_data_week13.csv")
df_schedule["datetime"] = pd.to_datetime(
    df_schedule["datetime"], infer_datetime_format=True
)
df_schedule["hour"] = df_schedule["datetime"].dt.hour

# find valid games in the schedule (Sunday 1pm and 4pm games only)
df_schedule_valid = df_schedule[
    (df_schedule["datetime"].dt.date == datetime.date(2020, 12, 6))
    & (df_schedule["hour"].isin([1, 4]))
    & (df_schedule["home"] != "pit")
]
valid_teams = (
    df_schedule_valid["favorite"].unique().tolist()
    + df_schedule_valid["underdog"].unique().tolist()
)

print("number of teams playing on sunday at 1pm or 4pm:", len(valid_teams))

# create a mapping of expected points per team based on the over/under and line
# note the line is negative for the favored team, so we subtract it from the
#  favorite and add it to the underdog
team_points_mapping = {}
for row in df_schedule_valid.itertuples():
    team_points_mapping[row.favorite] = (row.over_under - row.line) / 2
    team_points_mapping[row.underdog] = (row.over_under + row.line) / 2

number of teams playing on sunday at 1pm or 4pm: 22


## Split up by position and filter

In [4]:
# injured_players = ["Jones, Aaron", "Tonyan, Robert"]
injured_players = []

df_filtered = df_players[
    (df_players["team"].isin(valid_teams))
    & (df_players["salary"] > 0)
    & (df_players["projected_points"] > 0)
    & (~df_players["name"].isin(injured_players))
    & (df_players["start_sit_score"] >= 2.33)  # start/sit grade >= C+
]

# for each position, filter the list using a set of criteria
df_qb = df_filtered[
    (df_filtered["position"] == "QB")
    & (df_filtered["salary"] >= 5000)
    & (df_filtered["projected_points_per_salary"] > 2.5)
]

df_rb = df_filtered[
    (df_filtered["position"] == "RB")
    & (df_filtered["salary"] >= 4000)
    & (df_filtered["projected_points_per_salary"] > 2.0)
]

df_wr = df_filtered[
    (df_filtered["position"] == "WR")
    & (df_filtered["salary"] >= 4000)
    & (df_filtered["projected_points_per_salary"] > 2.2)
]

df_te = df_filtered[
    (df_filtered["position"] == "TE")
    & (df_filtered["salary"] >= 3000)
    & (df_filtered["projected_points_per_salary"] > 2.0)
]

df_dst = df_filtered[
    (df_filtered["position"] == "DST")
    & (df_filtered["salary"] >= 2000)
    & (df_filtered["projected_points_per_salary"] > 1.5)
]

# combine them all back together
df_players_filtered = pd.concat([df_qb, df_rb, df_wr, df_te, df_dst]).reset_index(
    drop=True
)

print("number of QB:", len(df_qb))
print("number of RB:", len(df_rb))
print("number of WR:", len(df_wr))
print("number of TE:", len(df_te))
print("number of DST:", len(df_dst))
print("total players:", len(df_players_filtered))

number of QB: 8
number of RB: 20
number of WR: 23
number of TE: 8
number of DST: 9
total players: 68


## Create lists of players and their attributes

In [5]:
# create list of all players using the Player class defined above
all_players = [
    Player(
        name=row.name,
        position=row.position,
        team=row.team,
        opponent=row.opponent,
        salary=row.salary,
        points_per_game=row.points_per_game,
        points_per_game_per_salary=row.points_per_game_per_salary,
        expected_team_points=team_points_mapping[row.team],
        expected_opponent_points=team_points_mapping[row.opponent],
        projected_points=row.projected_points,
        start_sit_score=row.start_sit_score,
        start_sit_grade=row.start_sit_grade,
        position_rank=row.position_rank,
        player_owned_pct=row.player_owned_pct,
    )
    for row in df_players_filtered.itertuples()
]

# separate out players by position
qb_players = [p for p in all_players if p.position == "QB"]
wr_players = [p for p in all_players if p.position == "WR"]
rb_players = [p for p in all_players if p.position == "RB"]
te_players = [p for p in all_players if p.position == "TE"]
dst_players = [p for p in all_players if p.position == "DST"]

# get all combinations of 3 WR and 3 RB (assume flex is RB)
# we can change this if we want a team where the flex is also a WR
wr_combinations = list(combinations(wr_players, r=4))
rb_combinations = list(combinations(rb_players, r=2))

# filter combinations to only include one player per team
wr_combinations = [
    c for c in wr_combinations if len(c) == len(set([p.team for p in c]))
]
rb_combinations = [
    c for c in rb_combinations if len(c) == len(set([p.team for p in c]))
]

total_num_combinations = (
    len(wr_combinations)
    * len(rb_combinations)
    * len(qb_players)
    * len(te_players)
    * len(dst_players)
)

print("number of WR combinations:", len(wr_combinations))
print("number of RB combinations:", len(rb_combinations))
print("total number of combinations:", total_num_combinations)

number of WR combinations: 7815
number of RB combinations: 187
total number of combinations: 841769280


## Create list of feasible teams

In [2]:
import random
import numpy as np

In [None]:
np.random()

In [6]:
# randomly downsample the WR/RB combinations
random_wr_combs = random.sample(wr_combinations, k=7815)
random_rb_combs = random.sample(rb_combinations, k=187)

start_time = time.time()
total_count = 0
feasible_teams = []

# loop through each position, create a team, and filter
for qb in qb_players:
    for te in te_players:
        for dst in dst_players:
            for wrs in random_wr_combs:
                for rbs in random_rb_combs:
                    team = Team(
                        QB=qb,
                        RB1=rbs[0],
                        RB2=rbs[1],
                        WR1=wrs[0],
                        WR2=wrs[1],
                        WR3=wrs[2],
                        Flex=wrs[3], # flex can be RB or WR
                        TE=te,
                        DST=dst,
                    )
                    # filter teams that
                    #  1) match the salary constraint;
                    #  2) offenive players not playing defense;
                    #  3) have no more than 2 players on the same team;
                    #  4) QB and RB are not on the same team;
                    #  5) QB and WR are on same team; and
                    #  6) WR and TE are not on the same team.
                    if (
                        49500 <= team.total_salary <= 50000
                        and not team.offense_playing_defense
                        and team.max_num_players_same_team <= 2
                        and not team.qb_rb_same_team
                        and team.qb_wr_same_team
                        and not team.te_wr_same_team
                    ):
                        feasible_teams.append(team)

                    total_count += 1

elapsed_time = time.time() - start_time
print(f"total run time: {round(elapsed_time / 60, 1)} minutes")
print(f"number of feasible teams found: {len(feasible_teams)}")
print(f"percent feasible: {round(len(feasible_teams) / total_count * 100, 1)}%")

total run time: 46.7 minutes
number of feasible teams found: 3033752
percent feasible: 0.4%


## Sort and display top team by aggregate score

In [7]:
# sort and score by total ppg (higher is better)
by_total_ppg = sorted(feasible_teams, key=lambda x: x.total_ppg, reverse=True)
team_score_by_total_ppg = {
    team: 1 - i / len(by_total_ppg) for i, team in enumerate(by_total_ppg)
}

# sort and score by total expected team points (higher is better)
by_total_expected_team_points = sorted(
    feasible_teams, key=lambda x: x.total_expected_team_points, reverse=True
)
team_score_by_total_expected_team_points = {
    team: 1 - i / len(by_total_expected_team_points)
    for i, team in enumerate(by_total_expected_team_points)
}

# sort and score by projected points (higher is better)
by_total_projected_points = sorted(
    feasible_teams, key=lambda x: x.total_projected_points, reverse=True
)
team_score_by_total_projected_points = {
    team: 1 - i / len(by_total_projected_points)
    for i, team in enumerate(by_total_projected_points)
}

# sort and score by start/sit score (higher is better)
by_start_sit_score = sorted(
    feasible_teams, key=lambda x: x.avg_start_sit_score, reverse=True
)
team_score_by_start_sit_score = {
    team: 1 - i / len(by_start_sit_score) for i, team in enumerate(by_start_sit_score)
}

# sort and score by percent player owned score (higher is better)
by_owned_pct = sorted(
    feasible_teams, key=lambda x: x.avg_player_owned_pct, reverse=True
)
team_score_by_owned_pct = {
    team: 1 - i / len(by_owned_pct) for i, team in enumerate(by_owned_pct)
}

In [8]:
# create tuples of teams and weighted average score
team_score = []
for team in feasible_teams:
    score1 = team_score_by_total_ppg[team]
    score2 = team_score_by_total_expected_team_points[team]
    score3 = team_score_by_total_projected_points[team]
    score4 = team_score_by_start_sit_score[team]
    score5 = team_score_by_owned_pct[team]
    weighted_avg_score = (
        0 * score1 + 0.3 * score2 + 0.6 * score3 + 0.1 * score4 + 0.0 * score5
    )
    team_score.append((team, weighted_avg_score))

# sort by score
by_score = sorted(team_score, key=lambda x: x[1], reverse=True)[:3]

# print out the details
for i, team_score in enumerate(by_score):
    team = team_score[0]
    print(f"***** team #{i + 1} *****")
    for pos in team.__annotations__.keys():
        p = getattr(team, pos)
        print(
            f"{pos}: {p.name} ({p.team.upper()}) | ${p.salary} | Proj: {p.projected_points} | Grade: {p.start_sit_grade} | Pos Rank: {p.position_rank}"
        )
    print(f"total projected points: {team.total_projected_points}")
    print("")

***** team #1 *****
QB: Tannehill, Ryan (TEN) | $6200 | Proj: 19.5 | Grade: B | Pos Rank: 7
RB1: Hines, Nyheim (IND) | $5300 | Proj: 12.8 | Grade: B | Pos Rank: 19
RB2: Booker, Devontae (LV) | $5500 | Proj: 12.9 | Grade: B+ | Pos Rank: 18
WR1: Jefferson, Justin (MIN) | $6900 | Proj: 18.0 | Grade: A | Pos Rank: 7
WR2: Woods, Robert (LAR) | $5900 | Proj: 17.1 | Grade: A | Pos Rank: 12
WR3: Davis, Corey (TEN) | $5100 | Proj: 13.9 | Grade: B- | Pos Rank: 26
TE: Waller, Darren (LV) | $6100 | Proj: 16.4 | Grade: A | Pos Rank: 2
Flex: Cooks, Brandin (HOU) | $5600 | Proj: 15.9 | Grade: A- | Pos Rank: 19
DST: Seattle (SEA) | $3300 | Proj: 8.8 | Grade: A | Pos Rank: 4
total projected points: 135.3

***** team #2 *****
QB: Tannehill, Ryan (TEN) | $6200 | Proj: 19.5 | Grade: B | Pos Rank: 7
RB1: Ekeler, Austin (LAC) | $7100 | Proj: 19.9 | Grade: A+ | Pos Rank: 2
RB2: Hines, Nyheim (IND) | $5300 | Proj: 12.8 | Grade: B | Pos Rank: 19
WR1: Jefferson, Justin (MIN) | $6900 | Proj: 18.0 | Grade: A | Po

In [9]:
df_schedule_valid

Unnamed: 0,datetime,favorite,line,underdog,over_under,home,hour
0,2020-12-06 01:00:00,chi,-3.0,det,44.5,chi,1
1,2020-12-06 01:00:00,mia,-11.5,cin,42.5,mia,1
2,2020-12-06 01:00:00,ind,-3.5,hou,51.0,hou,1
3,2020-12-06 01:00:00,min,-10.0,jac,52.5,min,1
4,2020-12-06 01:00:00,lv,-9.0,nyj,47.0,nyj,1
5,2020-12-06 01:00:00,no,-3.0,atl,44.5,atl,1
6,2020-12-06 01:00:00,ten,-5.5,cle,53.5,ten,1
7,2020-12-06 04:05:00,sea,-10.0,nyg,46.5,sea,4
8,2020-12-06 04:05:00,lar,-3.0,ari,48.0,ari,4
9,2020-12-06 04:25:00,gb,-8.5,phi,46.5,gb,4
