## Initalize notebook

In [1]:
import typing as T

import requests
import pandas as pd
from itertools import combinations
import random
import time
from collections import Counter

## Get data

In [2]:
url = "http://rotoguru1.com/cgi-bin/fstats.cgi"
params = {
    "pos": 0,
    "sort": 5,
    "game": "p",
    "colA": 0,
    "daypt": 0,
    "xavg": 1,
    "inact": 0,
    "maxprc": 99999,
    "outcsv": 1,
}
r = requests.get(url, params=params)
r.status_code

200

## Convert raw data into a dataframe

In [3]:
# split on newlines
split_newline = r.text.split("\n")

# if the first 4 characters in a line are digits, then it's player data (GID)
players = [line for line in split_newline if line[:4].isdigit()]

print("number of players scraped:", len(players))

# combine the headers with each row of data
headers = [
    "gid",
    "position",
    "name",
    "team",
    "opponent",
    "home/away",
    "salary",
    "salary_change",
    "total_points",
    "games_played",
    "points_per_game",
    "points_per_game_per_salary",
    "points_per_game_alt",
    "bye_week",
    "ytd_salary_high/low",
]
players_data = []
for row in players:
    split_row = row.split(";")
    data_dict = {k: v for k, v in zip(headers, split_row)}
    players_data.append(data_dict)

# convert to a dataframe
df_players = pd.DataFrame(players_data)
df_players["salary"] = df_players["salary"].astype(int)
df_players["salary_change"] = df_players["salary_change"].astype(int)
df_players["points_per_game"] = df_players["points_per_game"].astype(float)
df_players["points_per_game_per_salary"] = df_players[
    "points_per_game_per_salary"
].astype(float)

df_players.head()

number of players scraped: 495


Unnamed: 0,gid,position,name,team,opponent,home/away,salary,salary_change,total_points,games_played,points_per_game,points_per_game_per_salary,points_per_game_alt,bye_week,ytd_salary_high/low
0,1412,QB,"Wilson, Russell",sea,,H,0,0,159.38,5,31.88,0.0,31.88,6,L
1,1501,QB,"Prescott, Dak",dal,ari,H,4000,-3400,151.64,5,30.33,7.58,30.33,10,L
2,1529,QB,"Allen, Josh",buf,kan,H,7900,400,150.64,5,30.13,3.81,30.13,11,H
3,5562,RB,"Kamara, Alvin",no,,H,0,0,150.6,5,30.12,0.0,30.12,6,L
4,1523,QB,"Mahomes II, Patrick",kan,buf,A,7800,100,147.86,5,29.57,3.79,29.57,10,H


## Split up by position and filter

In [9]:
# teams that aren't playing at 1pm or 4pm on Sunday (week 6)
# source: https://www.fftoday.com/nfl/schedule.php?o=1&Week=6
invalid_teams = ["lar", "sfo", "kan", "buf", "ari", "dal", "lv", "lac", "no", "sea"]

# players exclude list (an optional list of players to exclude)
players_exclude = ["Gordon, Melvin", "McCaffrey, Christian"]

df_filtered = df_players[
    (~df_players["team"].isin(invalid_teams))
    & (df_players["salary"] > 0)
    & (~df_players["name"].isin(players_exclude))
]

print("number of players remaining after filters:", len(df_players))

# for each position, filter the list using a set of criteria
df_qb = df_filtered[
    (df_filtered["position"] == "QB")
    & (df_filtered["salary"] >= 5000)
    & (df_filtered["points_per_game_per_salary"] > 2.5)
]

df_rb = df_filtered[
    (df_filtered["position"] == "RB")
    & (df_filtered["salary"] >= 4000)
    & (df_filtered["points_per_game_per_salary"] > 2.5)
]

df_wr = df_filtered[
    (df_filtered["position"] == "WR")
    & (df_filtered["salary"] >= 4000)
    & (df_filtered["points_per_game_per_salary"] > 2.5)
]

df_te = df_filtered[
    (df_filtered["position"] == "TE")
    & (df_filtered["salary"] >= 3000)
    & (df_filtered["points_per_game_per_salary"] > 2.5)
]

df_dst = df_filtered[
    (df_filtered["position"] == "D")
    & (df_filtered["salary"] >= 2000)
    & (df_filtered["points_per_game_per_salary"] > 2.0)
]

# combine them all back together
df_players_filtered = pd.concat([df_qb, df_rb, df_wr, df_te, df_dst]).reset_index(
    drop=True
)

print("number of QB:", len(df_qb))
print("number of RB:", len(df_rb))
print("number of WR:", len(df_wr))
print("number of TE:", len(df_te))
print("number of DST:", len(df_dst))
print("total players remaining:", len(df_players_filtered))

number of players remaining after filters: 495
number of QB: 20
number of RB: 15
number of WR: 21
number of TE: 5
number of DST: 9
total players remaining: 70


## Define Player and Team classes

In [10]:
class Player(T.NamedTuple):
    name: str
    position: str
    team: str
    opponent: str
    salary: int
    points_per_game: float
    points_per_game_per_salary: float


class Team(T.NamedTuple):
    QB: Player
    RB1: Player
    RB2: Player
    WR1: Player
    WR2: Player
    WR3: Player
    TE: Player
    Flex: Player
    DST: Player

    @property
    def total_salary(self) -> int:
        keys = self.__annotations__.keys()
        salaries = [self.__getattribute__(k).salary for k in keys]
        return sum(salaries)

    @property
    def offense_playing_defense(self) -> bool:
        offense_teams = [
            self.QB.team,
            self.RB1.team,
            self.RB2.team,
            self.WR1.team,
            self.WR2.team,
            self.WR3.team,
            self.TE.team,
            self.Flex.team,
        ]
        if self.DST.opponent in offense_teams:
            return True
        return False

    @property
    def max_num_players_same_team(self) -> int:
        keys = self.__annotations__.keys()
        teams = [self.__getattribute__(k).team for k in keys]
        return max(Counter(teams).values())

    @property
    def total_ppg_ps(self) -> float:
        keys = self.__annotations__.keys()
        ppg_ps = [self.__getattribute__(k).points_per_game_per_salary for k in keys]
        return round(sum(ppg_ps), 2)

    @property
    def total_ppg(self) -> float:
        keys = self.__annotations__.keys()
        ppg = [self.__getattribute__(k).points_per_game for k in keys]
        return round(sum(ppg), 2)

## Create lists of players and their attributes

In [14]:
# create list of all players using the Player class defined above
all_players = [
    Player(
        name=row.name,
        position=row.position,
        team=row.team,
        opponent=row.opponent,
        salary=row.salary,
        points_per_game=row.points_per_game,
        points_per_game_per_salary=row.points_per_game_per_salary,
    )
    for row in df_players_filtered.itertuples()
]

# separate out players by position
qb_players = [p for p in all_players if p.position == "QB"]
wr_players = [p for p in all_players if p.position == "WR"]
rb_players = [p for p in all_players if p.position == "RB"]
te_players = [p for p in all_players if p.position == "TE"]
dst_players = [p for p in all_players if p.position == "D"]

# get all combinations of 3 WR and 3 RB (assume flex is RB)
# we can change this if we want a team where the flex is also a WR
wr_combinations = list(combinations(wr_players, r=4))
rb_combinations = list(combinations(rb_players, r=2))

# filter combinations to only include one player per team
wr_combinations = [
    c for c in wr_combinations if len(c) == len(set([p.team for p in c]))
]
rb_combinations = [
    c for c in rb_combinations if len(c) == len(set([p.team for p in c]))
]

total_num_combinations = (
    len(wr_combinations)
    * len(rb_combinations)
    * len(qb_players)
    * len(te_players)
    * len(dst_players)
)

print("number of WR combinations:", len(wr_combinations))
print("number of RB combinations:", len(rb_combinations))
print("total number of combinations:", total_num_combinations)

number of WR combinations: 5007
number of RB combinations: 101
total number of combinations: 455136300


## Create list of feasible teams

In [15]:
# randomly downsample the WR/RB combinations
random_wr_combs = random.sample(wr_combinations, k=3000)
random_rb_combs = random.sample(rb_combinations, k=100)

start_time = time.time()
total_count = 0
feasible_teams = []

# loop through each position, create a team, and filter
for qb in qb_players:
    for te in te_players:
        for dst in dst_players:
            for wrs in random_wr_combs:
                for rbs in random_rb_combs:
                    team = Team(
                        QB=qb,
                        RB1=rbs[0],
                        RB2=rbs[1],
                        WR1=wrs[0],
                        WR2=wrs[1],
                        WR3=wrs[2],
                        Flex=wrs[3], # flex can be RB or WR
                        TE=te,
                        DST=dst,
                    )
                    # filter teams that 
                    # 1) match the salary constraint;
                    # 2) offenive players not playing defense;
                    # 3) have no more than 2 players on the same team
                    if (
                        team.total_salary == 50000
                        and not team.offense_playing_defense
                        and team.max_num_players_same_team <= 2
                    ):
                        feasible_teams.append(team)

                    total_count += 1

elapsed_time = time.time() - start_time
print(f"total run time: {round(elapsed_time / 60, 1)} minutes")

print(f"percent feasible: {round(len(feasible_teams) / total_count * 100, 1)}%")

total run time: 11.8 minutes
percent feasible: 0.8%


## Sort and display top teams

In [16]:
# sort by highest total points per game and take the top 3 teams
top_teams = sorted(feasible_teams, key=lambda x: x.total_ppg, reverse=True)[:3]

# print out the details
for i, team in enumerate(top_teams):
    print(f"***** team #{i + 1} *****")
    print(
        f"QB: {team.QB.name} ({team.QB.team.upper()}) | ${team.QB.salary} | PPG: {team.QB.points_per_game}"
    )
    print(
        f"RB1: {team.RB1.name} ({team.RB1.team.upper()}) | ${team.RB1.salary} | PPG: {team.RB1.points_per_game}"
    )
    print(
        f"RB2: {team.RB2.name} ({team.RB2.team.upper()}) | ${team.RB2.salary} | PPG: {team.RB2.points_per_game}"
    )
    print(
        f"WR1: {team.WR1.name} ({team.WR1.team.upper()}) | ${team.WR1.salary} | PPG: {team.WR1.points_per_game}"
    )
    print(
        f"WR2: {team.WR2.name} ({team.WR2.team.upper()}) | ${team.WR2.salary} | PPG: {team.WR2.points_per_game}"
    )
    print(
        f"WR3: {team.WR3.name} ({team.WR3.team.upper()}) | ${team.WR3.salary} | PPG: {team.WR3.points_per_game}"
    )
    print(
        f"TE: {team.TE.name} ({team.TE.team.upper()}) | ${team.TE.salary} | PPG: {team.TE.points_per_game}"
    )
    print(
        f"Flex: {team.Flex.name} ({team.Flex.team.upper()}) | ${team.Flex.salary} | PPG: {team.Flex.points_per_game}"
    )
    print(
        f"DST: {team.DST.name} ({team.DST.team.upper()}) | ${team.DST.salary} | PPG: {team.DST.points_per_game} | Playing: {team.DST.opponent.upper()}"
    )
    print(f"total points per game: {team.total_ppg}")
    print("")

***** team #1 *****
QB: Newton, Cam (NE) | $6500 | PPG: 25.49
RB1: Davis, Mike (CAR) | $7000 | PPG: 22.65
RB2: Sanders, Miles (PHI) | $6600 | PPG: 17.33
WR1: Crowder, Jamison (NYJ) | $6100 | PPG: 25.5
WR2: Fulgham, Travis (PHI) | $4400 | PPG: 23.95
WR3: Claypool, Chase (PIT) | $5200 | PPG: 18.38
TE: Tonyan, Robert (GB) | $5100 | PPG: 20.1
Flex: Lazard, Allen (GB) | $5100 | PPG: 18.37
DST: Indianapolis (IND) | $4000 | PPG: 12.6 | Playing: CIN
total points per game: 184.37

***** team #2 *****
QB: Newton, Cam (NE) | $6500 | PPG: 25.49
RB1: Jones, Aaron (GB) | $7600 | PPG: 26.23
RB2: Davis, Mike (CAR) | $7000 | PPG: 22.65
WR1: Crowder, Jamison (NYJ) | $6100 | PPG: 25.5
WR2: Fulgham, Travis (PHI) | $4400 | PPG: 23.95
WR3: Davis, Corey (TEN) | $4800 | PPG: 14.87
TE: Tonyan, Robert (GB) | $5100 | PPG: 20.1
Flex: Patrick, Tim (DEN) | $4500 | PPG: 12.98
DST: Indianapolis (IND) | $4000 | PPG: 12.6 | Playing: CIN
total points per game: 184.37

***** team #3 *****
QB: Fitzpatrick, Ryan (MIA) | $5