In [1]:
# import modules
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv
import re

In [2]:
# Create a dictionary of URLs for each position group

projectons_dict = {
    "QB": "https://www.fantasypros.com/nfl/projections/qb.php?week=draft",
    "RB": "https://www.fantasypros.com/nfl/projections/rb.php?week=draft&scoring=PPR&week=draft",
    "WR": "https://www.fantasypros.com/nfl/projections/wr.php?week=draft&scoring=PPR&week=draft",
    "TE": "https://www.fantasypros.com/nfl/projections/te.php?week=draft&scoring=PPR&week=draft",
    "FLEX": "https://www.fantasypros.com/nfl/projections/flex.php?week=draft&scoring=PPR&week=draft",
    "K": "https://www.fantasypros.com/nfl/projections/k.php?week=draft",
    "DEF": "https://www.fantasypros.com/nfl/projections/dst.php?week=draft",
}

In [19]:
# Define a function to scrape data tables from the internet and save them as csv files


def scrape_tables(dictionary):
    # Step 2: Loop through each key-value pair in the dictionary
    for name, url in dictionary.items():
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        # Example: Locate and extract data from a table (customize this as needed)
        table = soup.find("table")

        if table:
            headers = [header.get_text().strip() for header in table.find_all("th")]
            rows = []
            for row in table.find_all("tr"):
                cells = [cell.get_text().strip() for cell in row.find_all("td")]
                if cells:  # Only append non-empty rows
                    rows.append(cells)

            # Combine headers and rows into a DataFrame
            df = pd.DataFrame(rows, columns=headers)

            # Step 3: Save each DataFrame to a separate CSV file with the variable name
            filename = os.path.join("data/scraped", f"{name}_projections.csv")
            df.to_csv(filename, index=False)

            print(f"Data from {name} ({url}) saved to {filename}")

        else:
            print("URL does not contain a table.")
            break

    print("Scraping and data saving completed.")
    return


scrape_tables(projectons_dict)

Data from QB (https://www.fantasypros.com/nfl/projections/qb.php?week=draft) saved to data/scraped/QB_projections.csv
Data from RB (https://www.fantasypros.com/nfl/projections/rb.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/RB_projections.csv
Data from WR (https://www.fantasypros.com/nfl/projections/wr.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/WR_projections.csv
Data from TE (https://www.fantasypros.com/nfl/projections/te.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/TE_projections.csv
Data from FLEX (https://www.fantasypros.com/nfl/projections/flex.php?week=draft&scoring=PPR&week=draft) saved to data/scraped/FLEX_projections.csv
Data from K (https://www.fantasypros.com/nfl/projections/k.php?week=draft) saved to data/scraped/K_projections.csv
Data from DEF (https://www.fantasypros.com/nfl/projections/dst.php?week=draft) saved to data/scraped/DEF_projections.csv
Scraping and data saving completed.


In [4]:
# Clean the data so it can be manipulated

# Some of the positions have an extra row of column headers that need to be deleted

clean_files = [
    "QB_projections",
    "RB_projections",
    "WR_projections",
    "TE_projections",
    "FLEX_projections",
    "K_projections",
    "DEF_projections",
]

# Position mapping for each file
position_mapping = {
    "QB_projections": "QB",
    "RB_projections": "RB",
    "WR_projections": "WR",
    "TE_projections": "TE",
    "K_projections": "K",
    "DEF_projections": "DEF",
}

# List to store each cleaned dataframe in order to merge them into one final one at the end
dfs = []

for file in clean_files:
    df = pd.read_csv(f"data/scraped/{file}.csv")

    # Make sure all columns after the 'Player' column are numerical
    df.iloc[:, 2:] = (
        df.iloc[:, 2:]
        .replace({",": ""}, regex=True)
        .apply(pd.to_numeric, errors="coerce")
        .astype(float)
    )

    if file == "RB_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "ATT": "RUSH_ATT",
                "YDS": "RUSH_YDS",
                "TDS": "RUSH_TDS",
                "YDS.1": "REC_YDS",
                "TDS.1": "REC_TDS",
            }
        )
    elif file == "WR_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "YDS": "REC_YDS",
                "TDS": "REC_TDS",
                "YDS.1": "RUSH_YDS",
                "TDS.1": "RUSH_TDS",
            }
        )
    elif file == "TE_projections":
        df = df[1:]
        df = df.rename(columns={"YDS": "REC_YDS", "TDS": "REC_TDS"})

    elif file == "QB_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "YDS": "PASS_YDS",
                "TDS": "PASS_TDS",
                "ATT.1": "RUSH_ATT",
                "YDS.1": "RUSH_YDS",
                "TDS.1": "RUSH_TDS",
            }
        )

    elif file == "FLEX_projections":
        df = df[1:]
        df = df.rename(
            columns={
                "YDS": "RUSH_YDS",
                "TDS": "RUSH_TDS",
                "YDS.1": "REC_YDS",
                "TDS.1": "REC_TDS",
            }
        )
        df["POS"] = df["POS"].astype(str).apply(lambda x: re.sub(r"[^a-zA-Z]", "", x))

        # Continue to the next file without appending FLEX to dfs
        continue

    # Assign position based on the filename using position_mapping
    pos = position_mapping[file]
    df["POS"] = pos

    # Append the cleaned DataFrame to the list
    dfs.append(df)

    # Save cleaned files to the clean subdirectory
    filename = os.path.join("data/clean", f"{file}_clean.csv")
    df.to_csv(filename, index=False)

    print(f"Data from data/scraped/{file}.csv cleaned and saved to {filename}")

# Concatenate all DataFrames in the list into one
final_df = pd.concat(dfs, ignore_index=True)

# Move 'POS' column to be the second column
columns = final_df.columns.tolist()
if "POS" in columns:
    columns.insert(1, columns.pop(columns.index("POS")))
    final_df = final_df[columns]


# Optionally, save the final merged DataFrame to a CSV file
final_filename = os.path.join("data/clean", "all_projections_clean.csv")
final_df.to_csv(final_filename, index=False)

print(f"All data merged and saved to {final_filename}")

print("Data cleaning completed.")

Data from data/scraped/QB_projections.csv cleaned and saved to data/clean/QB_projections_clean.csv
Data from data/scraped/RB_projections.csv cleaned and saved to data/clean/RB_projections_clean.csv
Data from data/scraped/WR_projections.csv cleaned and saved to data/clean/WR_projections_clean.csv
Data from data/scraped/TE_projections.csv cleaned and saved to data/clean/TE_projections_clean.csv
Data from data/scraped/K_projections.csv cleaned and saved to data/clean/K_projections_clean.csv
Data from data/scraped/DEF_projections.csv cleaned and saved to data/clean/DEF_projections_clean.csv
All data merged and saved to data/clean/all_projections_clean.csv
Data cleaning completed.


In [16]:
df = pd.read_csv("data/clean/all_projections_clean.csv")
print(df.dtypes)
df

Player       object
POS          object
ATT         float64
CMP         float64
PASS_YDS    float64
PASS_TDS    float64
INTS        float64
RUSH_ATT    float64
RUSH_YDS    float64
RUSH_TDS    float64
FL          float64
FPTS        float64
REC         float64
REC_YDS     float64
REC_TDS     float64
FG          float64
FGA         float64
XPT         float64
SACK        float64
INT         float64
FR          float64
FF          float64
TD          float64
SAFETY      float64
PA          float64
YDS AGN     float64
dtype: object


Unnamed: 0,Player,POS,ATT,CMP,PASS_YDS,PASS_TDS,INTS,RUSH_ATT,RUSH_YDS,RUSH_TDS,...,FGA,XPT,SACK,INT,FR,FF,TD,SAFETY,PA,YDS AGN
0,Jalen Hurts PHI,QB,520.2,339.7,3810.4,24.4,12.1,145.3,624.2,11.3,...,,,,,,,,,,
1,Josh Allen BUF,QB,564.7,368.0,4060.0,27.8,14.0,108.2,556.7,8.9,...,,,,,,,,,,
2,Lamar Jackson BAL,QB,463.6,303.7,3567.1,25.0,10.6,146.1,836.1,5.4,...,,,,,,,,,,
3,Patrick Mahomes II KC,QB,584.8,392.2,4371.2,32.7,11.2,72.0,376.4,1.8,...,,,,,,,,,,
4,Anthony Richardson IND,QB,511.7,325.6,3503.9,20.0,11.4,127.6,646.0,7.9,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709,Los Angeles Rams,DEF,,,,,,,,,...,,,41.1,13.0,8.2,12.0,2.1,0.5,406.5,5851.2
710,Tennessee Titans,DEF,,,,,,,,,...,,,42.5,11.1,9.2,12.8,2.1,0.0,393.4,5709.8
711,New England Patriots,DEF,,,,,,,,,...,,,36.0,12.4,9.5,14.0,2.4,0.0,405.7,5667.3
712,Arizona Cardinals,DEF,,,,,,,,,...,,,39.0,11.9,8.8,13.2,2.3,0.0,427.0,6029.7


In [6]:
df[df["POS"].isin(["RB", "WR", "TE"])][
    ["Player", "POS", "REC_YDS", "FPTS"]
].sort_values(by="FPTS", ascending=False)

Unnamed: 0,Player,POS,REC_YDS,FPTS
88,Christian McCaffrey SF,RB,567.4,351.0
255,CeeDee Lamb DAL,WR,1488.0,332.5
256,Tyreek Hill MIA,WR,1542.2,325.4
89,Breece Hall NYJ,RB,529.2,301.4
258,Amon-Ra St. Brown DET,WR,1354.2,297.4
...,...,...,...,...
514,Kawaan Baker NE,WR,19.4,3.8
253,Mike Boone CAR,RB,14.0,3.4
254,Tyrion Davis-Price PHI,RB,3.0,3.0
515,Quintez Cephus HOU,WR,1.5,1.1


In [21]:
mock_draft_url = {"Mock draft": "https://sleeper.com/draft/nfl/1132887124602236928"}

In [25]:
def scrape_sleeper_draft(dictionary):
    # Step 2: Loop through each key-value pair in the dictionary
    for name, url in dictionary.items():
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        # Example: Locate and extract data from a table (customize this as needed)
        table = soup.find("player-name")

        if table:
            headers = [header.get_text().strip() for header in table.find_all("th")]
            rows = []
            for row in table.find_all("tr"):
                cells = [cell.get_text().strip() for cell in row.find_all("td")]
                if cells:  # Only append non-empty rows
                    rows.append(cells)

            # Combine headers and rows into a DataFrame
            df = pd.DataFrame(rows, columns=headers)

            # Step 3: Save each DataFrame to a separate CSV file with the variable name
            filename = os.path.join("data/scraped", f"{name}_.csv")
            df.to_csv(filename, index=False)

            print(f"Data from {name} ({url}) saved to {filename}")

        else:
            print("Cannot find draft board.")
            break

    print("Done!")
    return

In [26]:
scrape_draft(mock_draft_url)

Cannot find draft board.
Done!


In [30]:
url = "https://sleeper.com/draft/nfl/1132887124602236928"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Find all the team columns
team_columns = soup.find_all("<div", class_="team-column")

# Dictionary to store player names for each team
teams_players = {}

# Iterate through each team column
for team in team_columns:
    # Find all player names within this team column
    player_elements = team.find_all("div", class_="player-name")
    player_names = [player.text for player in player_elements]

    # You can use some identifier for each team; here it's just using a sequential index
    team_id = team.get("id", "team")  # Replace with a suitable identifier if available
    teams_players[team_id] = player_names

# Print out the players for each team
for team_id, players in teams_players.items():
    print(f"{team_id}: {players}")

In [54]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time

# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode (no UI)

# Path to your ChromeDriver
service = Service("/Users/jakebarnabe/Downloads/chromedriver-mac-arm64/chromedriver")

# Create a WebDriver instance
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open the webpage
url = "https://sleeper.com/draft/nfl/1132887124602236928"
driver.get(url)

# Wait for the content to load (adjust time as needed)
time.sleep(5)

# Find elements containing player names
team_columns = driver.find_elements(By.CLASS_NAME, "team-column")
teams_players = {}

for i, team in enumerate(team_columns):
    players = []
    player_elements = team.find_elements(By.CLASS_NAME, "player")
    for player in player_elements:
        name = player.find_element(By.CLASS_NAME, "player-name").text
        # Assuming 'position' is another class inside 'player' div or similar structure
        position = player.find_element(By.CLASS_NAME, "position").text.split(" ")[0]
        nfl_team = player.find_element(By.CLASS_NAME, "position").text.split(" ")[2]
        players.append(f"{name} {position} {nfl_team}")

    teams_players[f"team_{i+1}"] = players

# Close the browser
driver.quit()

# Print or process the data
for team, players in teams_players.items():
    print(f"{team}: {players}")

team_1: ['J. Jefferson WR MIN', 'D. Adams WR LV', 'J. Allen QB BUF', 'R. White RB TB', 'M. Pittman WR IND', 'D. Kincaid TE BUF', 'D. Montgomery RB DET', 'X. Worthy WR KC', 'C. Ridley WR TEN', 'J. Ferguson TE DAL', 'B. Robinson RB WAS', 'R. Doubs WR GB', 'J. Ford RB CLE', 'C. Dicker K LAC', 'N. Jets DEF NYJ']
team_2: ['J. Chase WR CIN', 'S. LaPorta TE DET', 'D. Henry RB BAL', 'M. Nabers WR NYG', 'D. Moore WR CHI', 'J. Burrow QB CIN', 'C. Kirk WR JAX', 'T. Pollard RB TEN', 'Z. White RB LV', 'B. Thomas WR JAX', 'D. Goedert TE PHI', 'J. Daniels QB WAS', 'H. Butker K KC', 'J. Meyers WR LV', 'B. Ravens DEF BAL']
team_3: ['C. McCaffrey RB SF', 'D. London WR ATL', 'T. Kelce TE KC', 'J. Cook RB BUF', 'K. Walker RB SEA', 'T. Dell WR HOU', 'D. Swift RB CHI', 'J. Love QB GB', 'J. Addison WR MIN', 'T. Spears RB TEN', 'B. Purdy QB SF', 'C. Samuel WR BUF', 'P. Freiermuth TE PIT', 'K. Fairbairn K HOU', 'C. Browns DEF CLE']
team_4: ['T. Hill WR MIA', 'K. Williams RB LAR', 'I. Pacheco RB KC', 'C. Kupp W

In [55]:
teams_players["team_1"]

['J. Jefferson WR MIN',
 'D. Adams WR LV',
 'J. Allen QB BUF',
 'R. White RB TB',
 'M. Pittman WR IND',
 'D. Kincaid TE BUF',
 'D. Montgomery RB DET',
 'X. Worthy WR KC',
 'C. Ridley WR TEN',
 'J. Ferguson TE DAL',
 'B. Robinson RB WAS',
 'R. Doubs WR GB',
 'J. Ford RB CLE',
 'C. Dicker K LAC',
 'N. Jets DEF NYJ']

In [56]:
mock_draft_df = pd.DataFrame(teams_players)

mock_draft_df

Unnamed: 0,team_1,team_2,team_3,team_4,team_5,team_6,team_7,team_8,team_9,team_10
0,J. Jefferson WR MIN,J. Chase WR CIN,C. McCaffrey RB SF,T. Hill WR MIA,C. Lamb WR DAL,A. Brown WR PHI,B. Robinson RB ATL,B. Hall RB NYJ,A. St. Brown WR DET,G. Wilson WR NYJ
1,D. Adams WR LV,S. LaPorta TE DET,D. London WR ATL,K. Williams RB LAR,T. Etienne RB JAX,S. Barkley RB PHI,P. Nacua WR LAR,J. Taylor RB IND,J. Gibbs RB DET,M. Harrison WR ARI
2,J. Allen QB BUF,D. Henry RB BAL,T. Kelce TE KC,I. Pacheco RB KC,C. Olave WR NO,J. Jacobs RB GB,B. Aiyuk WR SF,P. Mahomes QB KC,J. Hurts QB PHI,L. Jackson QB BAL
3,R. White RB TB,M. Nabers WR NYG,J. Cook RB BUF,C. Kupp WR LAR,J. Waddle WR MIA,M. Evans WR TB,D. Samuel WR SF,N. Collins WR HOU,D. Achane RB MIA,S. Diggs WR HOU
4,M. Pittman WR IND,D. Moore WR CHI,K. Walker RB SEA,D. Metcalf WR SEA,Z. Flowers WR BAL,C. Stroud QB HOU,A. Kamara RB NO,G. Kittle TE SF,M. Andrews TE BAL,T. McBride TE ARI
5,D. Kincaid TE BUF,J. Burrow QB CIN,T. Dell WR HOU,G. Pickens WR PIT,K. Pitts TE ATL,D. Smith WR PHI,A. Richardson QB IND,A. Cooper WR CLE,A. Jones RB MIN,J. Mixon RB HOU
6,D. Montgomery RB DET,C. Kirk WR JAX,D. Swift RB CHI,J. Conner RB ARI,R. Stevenson RB NE,T. Higgins WR CIN,K. Allen WR CHI,T. McLaurin WR WAS,N. Harris RB PIT,R. Rice WR KC
7,X. Worthy WR KC,T. Pollard RB TEN,J. Love QB GB,K. Murray QB ARI,D. Prescott QB DAL,D. Njoku TE CLE,C. Godwin WR TB,R. Odunze WR CHI,J. Reed WR GB,R. Mostert RB MIA
8,C. Ridley WR TEN,Z. White RB LV,J. Addison WR MIN,E. Engram TE JAX,L. McConkey WR LAC,J. Williams RB DEN,B. Bowers TE LV,N. Chubb RB CLE,H. Brown WR KC,K. Coleman WR BUF
9,J. Ferguson TE DAL,B. Thomas WR JAX,T. Spears RB TEN,D. Hopkins WR TEN,J. Brooks RB CAR,D. Johnson WR CAR,J. Warren RB PIT,A. Ekeler RB WAS,D. Singletary RB NYG,Z. Moss RB CIN
