In [1]:
# import modules
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv
import re

In [2]:
# Create a dictionary of URLs for each position group

projectons_dict = {
    "QB": "https://www.fantasypros.com/nfl/projections/qb.php?week=draft",
    "RB": "https://www.fantasypros.com/nfl/projections/rb.php?week=draft&scoring=PPR&week=draft",
    "WR": "https://www.fantasypros.com/nfl/projections/wr.php?week=draft&scoring=PPR&week=draft",
    "TE": "https://www.fantasypros.com/nfl/projections/te.php?week=draft&scoring=PPR&week=draft",
    "FLEX": "https://www.fantasypros.com/nfl/projections/flex.php?week=draft&scoring=PPR&week=draft",
    "K": "https://www.fantasypros.com/nfl/projections/k.php?week=draft",
    "DEF": "https://www.fantasypros.com/nfl/projections/dst.php?week=draft",
}

In [3]:
# Define a function to scrape data tables from the internet and save them as csv files


def scrape_tables(dictionary):
    # Step 2: Loop through each key-value pair in the dictionary
    for name, url in dictionary.items():
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        # Example: Locate and extract data from a table (customize this as needed)
        table = soup.find("table")

        if table:
            headers = [header.get_text().strip() for header in table.find_all("th")]
            rows = []
            for row in table.find_all("tr"):
                cells = [cell.get_text().strip() for cell in row.find_all("td")]
                if cells:  # Only append non-empty rows
                    rows.append(cells)

            # Combine headers and rows into a DataFrame
            df = pd.DataFrame(rows, columns=headers)

            # Step 3: Save each DataFrame to a separate CSV file with the variable name
            filename = os.path.join("data/scraped", f"{name}_projections.csv")
            df.to_csv(filename, index=False)

            print(f"Data from {name} ({url}) saved to {filename}")

        else:
            print("URL does not contain a table.")
            break

    print("Scraping and data saving completed.")
    return


scrape_tables(projectons_dict)

Data from QB (https://www.fantasypros.com/nfl/projections/qb.php?week=draft) saved to data/scraped/QB_projections.csv
Data from RB (https://www.fantasypros.com/nfl/projections/rb.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/RB_projections.csv
Data from WR (https://www.fantasypros.com/nfl/projections/wr.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/WR_projections.csv
Data from TE (https://www.fantasypros.com/nfl/projections/te.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/TE_projections.csv
Data from FLEX (https://www.fantasypros.com/nfl/projections/flex.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/FLEX_projections.csv
Data from K (https://www.fantasypros.com/nfl/projections/k.php?week=draft) saved to data/scraped/K_projections.csv
Data from DEF (https://www.fantasypros.com/nfl/projections/dst.php?week=draft) saved to data/scraped/DEF_projections.csv
Scraping and data saving completed.


In [4]:
# Clean the data so it can be manipulated

# Some of the positions have an extra row of column headers that need to be deleted

clean_files = [
    "QB_projections",
    "RB_projections",
    "WR_projections",
    "TE_projections",
    "FLEX_projections",
    "K_projections",
    "DEF_projections",
]

# Position mapping for each file
position_mapping = {
    "QB_projections": "QB",
    "RB_projections": "RB",
    "WR_projections": "WR",
    "TE_projections": "TE",
    "K_projections": "K",
    "DEF_projections": "DEF",
}

# List to store each cleaned dataframe in order to merge them into one final one at the end
dfs = []

for file in clean_files:
    df = pd.read_csv(f"data/scraped/{file}.csv")

    # Make sure all columns after the 'Player' column are numerical
    df.iloc[:, 2:] = (
        df.iloc[:, 2:]
        .replace({",": ""}, regex=True)
        .apply(pd.to_numeric, errors="coerce")
        .astype(float)
    )

    if file == "RB_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "ATT": "RUSH_ATT",
                "YDS": "RUSH_YDS",
                "TDS": "RUSH_TDS",
                "YDS.1": "REC_YDS",
                "TDS.1": "REC_TDS",
            }
        )
    elif file == "WR_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "YDS": "REC_YDS",
                "TDS": "REC_TDS",
                "YDS.1": "RUSH_YDS",
                "TDS.1": "RUSH_TDS",
            }
        )
    elif file == "TE_projections":
        df = df[1:]
        df = df.rename(columns={"YDS": "REC_YDS", "TDS": "REC_TDS"})

    elif file == "QB_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "YDS": "PASS_YDS",
                "TDS": "PASS_TDS",
                "ATT.1": "RUSH_ATT",
                "YDS.1": "RUSH_YDS",
                "TDS.1": "RUSH_TDS",
            }
        )

    elif file == "FLEX_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "YDS": "RUSH_YDS",
                "TDS": "RUSH_TDS",
                "YDS.1": "REC_YDS",
                "TDS.1": "REC_TDS",
            }
        )
        df["POS"] = df["POS"].astype(str).apply(lambda x: re.sub(r"[^a-zA-Z]", "", x))

        # Continue to the next file without appending FLEX to dfs
        continue

    # Assign position based on the filename using position_mapping
    pos = position_mapping[file]
    df["POS"] = pos

    # Append the cleaned DataFrame to the list
    dfs.append(df)

    # Save cleaned files to the clean subdirectory
    filename = os.path.join("data/clean", f"{file}_clean.csv")
    df.to_csv(filename, index=False)

    print(f"Data from data/scraped/{file}.csv cleaned and saved to {filename}")

# Concatenate all DataFrames in the list into one
final_df = pd.concat(dfs, ignore_index=True)

# Move 'POS' column to be the second column
columns = final_df.columns.tolist()
if "POS" in columns:
    columns.insert(1, columns.pop(columns.index("POS")))
    final_df = final_df[columns]


# Optionally, save the final merged DataFrame to a CSV file
final_filename = os.path.join("data/clean", "all_projections_clean.csv")
final_df.to_csv(final_filename, index=False)

print(f"All data merged and saved to {final_filename}")

print("Data cleaning completed.")

Data from data/scraped/QB_projections.csv cleaned and saved to data/clean/QB_projections_clean.csv
Data from data/scraped/RB_projections.csv cleaned and saved to data/clean/RB_projections_clean.csv
Data from data/scraped/WR_projections.csv cleaned and saved to data/clean/WR_projections_clean.csv
Data from data/scraped/TE_projections.csv cleaned and saved to data/clean/TE_projections_clean.csv
Data from data/scraped/K_projections.csv cleaned and saved to data/clean/K_projections_clean.csv
Data from data/scraped/DEF_projections.csv cleaned and saved to data/clean/DEF_projections_clean.csv
All data merged and saved to data/clean/all_projections_clean.csv
Data cleaning completed.


In [27]:
all_projections_df = pd.read_csv("data/clean/all_projections_clean.csv")

all_projections_df["FPTS/Game"] = round(all_projections_df["FPTS"] / 17, 2)

all_projections_df = all_projections_df.replace("JAC", "JAX", regex=True)

all_projections_df[all_projections_df["Player"] == "Evan Engram JAX"]

Unnamed: 0,Player,POS,ATT,CMP,PASS_YDS,PASS_TDS,INTS,RUSH_ATT,RUSH_YDS,RUSH_TDS,...,XPT,SACK,INT,FR,FF,TD,SAFETY,PA,YDS AGN,FPTS/Game
522,Evan Engram JAX,TE,,,,,,,,,...,,,,,,,,,,11.08


In [None]:
mock_draft_url = {"Mock draft": "https://sleeper.com/draft/nfl/1132887124602236928"}

In [None]:
def scrape_sleeper_draft(dictionary):
    # Step 2: Loop through each key-value pair in the dictionary
    for name, url in dictionary.items():
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        # Example: Locate and extract data from a table (customize this as needed)
        table = soup.find("player-name")

        if table:
            headers = [header.get_text().strip() for header in table.find_all("th")]
            rows = []
            for row in table.find_all("tr"):
                cells = [cell.get_text().strip() for cell in row.find_all("td")]
                if cells:  # Only append non-empty rows
                    rows.append(cells)

            # Combine headers and rows into a DataFrame
            df = pd.DataFrame(rows, columns=headers)

            # Step 3: Save each DataFrame to a separate CSV file with the variable name
            filename = os.path.join("data/scraped", f"{name}_.csv")
            df.to_csv(filename, index=False)

            print(f"Data from {name} ({url}) saved to {filename}")

        else:
            print("Cannot find draft board.")
            break

    print("Done!")
    return

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time

# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no UI)

# Path to your ChromeDriver
service = Service("/Users/jakebarnabe/Downloads/chromedriver-mac-arm64/chromedriver")

# Create a WebDriver instance
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open the webpage
url = "https://sleeper.com/draft/nfl/1132887124602236928"
driver.get(url)

# Wait for the content to load (adjust time as needed)
time.sleep(5)

# Find elements containing player names
team_columns = driver.find_elements(By.CLASS_NAME, "team-column")
teams_players = {}

for i, team in enumerate(team_columns):
    players = []
    player_elements = team.find_elements(By.CLASS_NAME, "player")
    for player in player_elements:
        name = player.find_element(By.CLASS_NAME, "player-name").text
        # Assuming 'position' is another class inside 'player' div or similar structure
        position = player.find_element(By.CLASS_NAME, "position").text.split(" ")[0]
        nfl_team = player.find_element(By.CLASS_NAME, "position").text.split(" ")[2]
        players.append(f"{name} {position} {nfl_team}")

    teams_players[f"team_{i+1}"] = players

# Close the browser
driver.quit()

# Print or process the data
for team, players in teams_players.items():
    print(f"{team}: {players}")

team_1: ['J. Jefferson WR MIN', 'D. Adams WR LV', 'J. Allen QB BUF', 'R. White RB TB', 'M. Pittman WR IND', 'D. Kincaid TE BUF', 'D. Montgomery RB DET', 'X. Worthy WR KC', 'C. Ridley WR TEN', 'J. Ferguson TE DAL', 'B. Robinson RB WAS', 'R. Doubs WR GB', 'J. Ford RB CLE', 'C. Dicker K LAC', 'N. Jets DEF NYJ']
team_2: ['J. Chase WR CIN', 'S. LaPorta TE DET', 'D. Henry RB BAL', 'M. Nabers WR NYG', 'D. Moore WR CHI', 'J. Burrow QB CIN', 'C. Kirk WR JAX', 'T. Pollard RB TEN', 'Z. White RB LV', 'B. Thomas WR JAX', 'D. Goedert TE PHI', 'J. Daniels QB WAS', 'H. Butker K KC', 'J. Meyers WR LV', 'B. Ravens DEF BAL']
team_3: ['C. McCaffrey RB SF', 'D. London WR ATL', 'T. Kelce TE KC', 'J. Cook RB BUF', 'K. Walker RB SEA', 'T. Dell WR HOU', 'D. Swift RB CHI', 'J. Love QB GB', 'J. Addison WR MIN', 'T. Spears RB TEN', 'B. Purdy QB SF', 'C. Samuel WR BUF', 'P. Freiermuth TE PIT', 'K. Fairbairn K HOU', 'C. Browns DEF CLE']
team_4: ['T. Hill WR MIA', 'K. Williams RB LAR', 'I. Pacheco RB KC', 'C. Kupp W

In [9]:
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no UI)

# Path to your ChromeDriver
service = Service("/Users/jakebarnabe/Downloads/chromedriver-mac-arm64/chromedriver")

# Create a WebDriver instance
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open the webpage
url = "https://sleeper.com/draft/nfl/1132887124602236928"
driver.get(url)

# Wait for the content to load (adjust time as needed)
time.sleep(5)

# Find elements containing player names
team_columns = driver.find_elements(By.CLASS_NAME, "team-column")
teams_players = {}

for team in team_columns:
    # Extract the team name using 'header-text' class
    team_name_element = team.find_element(By.CLASS_NAME, "header-text")
    team_name = team_name_element.text

    players = []
    player_elements = team.find_elements(By.CLASS_NAME, "player")
    for player in player_elements:
        name = player.find_element(By.CLASS_NAME, "player-name").text
        # Assuming 'position' is another class inside 'player' div or similar structure
        position = player.find_element(By.CLASS_NAME, "position").text.split(" ")[0]
        nfl_team = player.find_element(By.CLASS_NAME, "position").text.split(" ")[2]
        players.append(f"{name} {position} {nfl_team}")

    teams_players[team_name] = players

# Close the browser
driver.quit()

# Print or process the data
for team, players in teams_players.items():
    print(f"{team}: {players}")

Team 1: ['J. Jefferson WR MIN', 'D. Adams WR LV', 'J. Allen QB BUF', 'R. White RB TB', 'M. Pittman WR IND', 'D. Kincaid TE BUF', 'D. Montgomery RB DET', 'X. Worthy WR KC', 'C. Ridley WR TEN', 'J. Ferguson TE DAL', 'B. Robinson RB WAS', 'R. Doubs WR GB', 'J. Ford RB CLE', 'C. Dicker K LAC', 'N. Jets DEF NYJ']
Team 2: ['J. Chase WR CIN', 'S. LaPorta TE DET', 'D. Henry RB BAL', 'M. Nabers WR NYG', 'D. Moore WR CHI', 'J. Burrow QB CIN', 'C. Kirk WR JAX', 'T. Pollard RB TEN', 'Z. White RB LV', 'B. Thomas WR JAX', 'D. Goedert TE PHI', 'J. Daniels QB WAS', 'H. Butker K KC', 'J. Meyers WR LV', 'B. Ravens DEF BAL']
Team 3: ['C. McCaffrey RB SF', 'D. London WR ATL', 'T. Kelce TE KC', 'J. Cook RB BUF', 'K. Walker RB SEA', 'T. Dell WR HOU', 'D. Swift RB CHI', 'J. Love QB GB', 'J. Addison WR MIN', 'T. Spears RB TEN', 'B. Purdy QB SF', 'C. Samuel WR BUF', 'P. Freiermuth TE PIT', 'K. Fairbairn K HOU', 'C. Browns DEF CLE']
Jbarns14: ['T. Hill WR MIA', 'K. Williams RB LAR', 'I. Pacheco RB KC', 'C. Kupp

In [31]:
mock_draft_df = pd.DataFrame(teams_players)
for column in mock_draft_df.columns:
    mock_draft_df[column] = mock_draft_df[column].str.replace(
        r"(DEF).*", "DEF", regex=True
    )

mock_draft_df

Unnamed: 0,Team 1,Team 2,Team 3,Jbarns14,Team 5,Team 6,Team 7,Team 8,Team 9,Team 10
0,J. Jefferson WR MIN,J. Chase WR CIN,C. McCaffrey RB SF,T. Hill WR MIA,C. Lamb WR DAL,A. Brown WR PHI,B. Robinson RB ATL,B. Hall RB NYJ,A. St. Brown WR DET,G. Wilson WR NYJ
1,D. Adams WR LV,S. LaPorta TE DET,D. London WR ATL,K. Williams RB LAR,T. Etienne RB JAX,S. Barkley RB PHI,P. Nacua WR LAR,J. Taylor RB IND,J. Gibbs RB DET,M. Harrison WR ARI
2,J. Allen QB BUF,D. Henry RB BAL,T. Kelce TE KC,I. Pacheco RB KC,C. Olave WR NO,J. Jacobs RB GB,B. Aiyuk WR SF,P. Mahomes QB KC,J. Hurts QB PHI,L. Jackson QB BAL
3,R. White RB TB,M. Nabers WR NYG,J. Cook RB BUF,C. Kupp WR LAR,J. Waddle WR MIA,M. Evans WR TB,D. Samuel WR SF,N. Collins WR HOU,D. Achane RB MIA,S. Diggs WR HOU
4,M. Pittman WR IND,D. Moore WR CHI,K. Walker RB SEA,D. Metcalf WR SEA,Z. Flowers WR BAL,C. Stroud QB HOU,A. Kamara RB NO,G. Kittle TE SF,M. Andrews TE BAL,T. McBride TE ARI
5,D. Kincaid TE BUF,J. Burrow QB CIN,T. Dell WR HOU,G. Pickens WR PIT,K. Pitts TE ATL,D. Smith WR PHI,A. Richardson QB IND,A. Cooper WR CLE,A. Jones RB MIN,J. Mixon RB HOU
6,D. Montgomery RB DET,C. Kirk WR JAX,D. Swift RB CHI,J. Conner RB ARI,R. Stevenson RB NE,T. Higgins WR CIN,K. Allen WR CHI,T. McLaurin WR WAS,N. Harris RB PIT,R. Rice WR KC
7,X. Worthy WR KC,T. Pollard RB TEN,J. Love QB GB,K. Murray QB ARI,D. Prescott QB DAL,D. Njoku TE CLE,C. Godwin WR TB,R. Odunze WR CHI,J. Reed WR GB,R. Mostert RB MIA
8,C. Ridley WR TEN,Z. White RB LV,J. Addison WR MIN,E. Engram TE JAX,L. McConkey WR LAC,J. Williams RB DEN,B. Bowers TE LV,N. Chubb RB CLE,H. Brown WR KC,K. Coleman WR BUF
9,J. Ferguson TE DAL,B. Thomas WR JAX,T. Spears RB TEN,D. Hopkins WR TEN,J. Brooks RB CAR,D. Johnson WR CAR,J. Warren RB PIT,A. Ekeler RB WAS,D. Singletary RB NYG,Z. Moss RB CIN


In [11]:
for player in mock_draft_df["Jbarns14"]:
    player_id = player.split(" ")[1] + " " + player.split(" ")[3]
    player_FPTS_per_game = all_projections_df[
        all_projections_df["Player"].str.contains(player_id)
        & (all_projections_df["POS"] == player.split(" ")[2])
    ][["Player", "FPTS/Game"]]

    # Team_FPTS_per_game = sum(player_FPTS_per_game)

In [33]:
# Assuming mock_draft_df and all_projections_df are already defined
# Initialize an empty DataFrame to store results
players_fpts_df = pd.DataFrame(columns=["Player", "FPTS/Game"])

# Loop through each player in the 'Jbarns14' column of mock_draft_df
for player in mock_draft_df["Jbarns14"]:
    if "DEF" in player:
        player_id = player.split(" ")[1]
        position = "DEF"  # If 'DEF' is present, set position to 'DEF'
    else:
        player_id = player.split(" ")[1] + " " + player.split(" ")[-1]
        position = player.split(" ")[2]

    # Filter all_projections_df for matching player_id and position
    player_FPTS_per_game = all_projections_df[
        all_projections_df["Player"].str.contains(player_id)
        & (all_projections_df["POS"] == position)
    ][["Player", "FPTS/Game"]]

    # Append the result to the players_fpts_df DataFrame
    players_fpts_df = pd.concat(
        [players_fpts_df, player_FPTS_per_game], ignore_index=True
    )

# Display the resulting DataFrame
print(players_fpts_df)

                 Player  FPTS/Game
0       Tyreek Hill MIA      19.14
1    Kyren Williams LAR      14.47
2      Isiah Pacheco KC      13.84
3       Cooper Kupp LAR      13.91
4        DK Metcalf SEA      13.42
5    George Pickens PIT      12.71
6      James Conner ARI      11.16
7      Kyler Murray ARI      17.78
8       Evan Engram JAX      11.08
9   DeAndre Hopkins TEN      10.89
10  Christian Watson GB      10.33
11      Gus Edwards LAC       8.81
12   Brandon Aubrey DAL       8.15
13       Dallas Cowboys       6.99
14    Chuba Hubbard CAR       7.21


  players_fpts_df = pd.concat([players_fpts_df, player_FPTS_per_game], ignore_index=True)


In [41]:
import pandas as pd

# Assuming mock_draft_df and all_projections_df are already defined
# Initialize an empty DataFrame to store results
players_fpts_df = pd.DataFrame(columns=["Player", "FPTS/Game", "Team"])

# Loop through each team column in mock_draft_df
for team_name in mock_draft_df.columns:
    for player in mock_draft_df[team_name]:
        if "DEF" in player:
            player_id = player.split(" ")[1]
            position = "DEF"  # If 'DEF' is present, set position to 'DEF'
        else:
            player_id = player.split(" ")[1] + " " + player.split(" ")[-1]
            position = player.split(" ")[2]

        # Filter all_projections_df for matching player_id and position
        player_FPTS_per_game = all_projections_df[
            all_projections_df["Player"].str.contains(player_id)
            & (all_projections_df["POS"] == position)
        ][["Player", "FPTS/Game"]]

        # Add team information to the result
        player_FPTS_per_game["Team"] = team_name

        # Append the result to the players_fpts_df DataFrame
        players_fpts_df = pd.concat(
            [players_fpts_df, player_FPTS_per_game], ignore_index=True
        )

# Calculate total FPTS/Game for each team
team_fpts_total = (
    players_fpts_df.groupby("Team")["FPTS/Game"]
    .sum()
    .reset_index()
    .sort_values("FPTS/Game", ascending=False)
)

# Rename columns for clarity
team_fpts_total.columns = ["Team", "Total FPTS/Game"]

# Display the resulting DataFrame with total FPTS/Game per team
team_fpts_total

  players_fpts_df = pd.concat([players_fpts_df, player_FPTS_per_game], ignore_index=True)


Unnamed: 0,Team,Total FPTS/Game
6,Team 6,185.1
0,Jbarns14,179.89
3,Team 2,173.63
4,Team 3,172.81
7,Team 7,172.51
5,Team 5,167.91
9,Team 9,165.55
2,Team 10,164.88
8,Team 8,159.28
1,Team 1,154.55


In [None]:
# def Calculate_team_points_per_game(df):
#     # Calculate the sum of each team's projected fantasy points per game
#     for team in df.columns:

#         for player in df[team]:
#             player_id = player.split(' ')[1] + ' ' + player.split(' ')[2]
#             player_FPTS_per_game = all_projections_df[all_projections_df['Player'].str.contains(player_id)]['FPTS/Game']

#         Team_FPTS_per_game = sum(player_FPTS_per_game)

#     return(Team_FPTS_per_game)

# Calculate_team_points_per_game(mock_draft_df)

0