In [17]:
import requests, time, re, json
from datetime import date
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 500)
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

#importing credentials
with open('../../../Notes-General/config.txt', 'r') as f:
    creds = f.read()
creds = json.loads(creds)

today = date.today()

browser_path = r"..\browsers\geckodriver.exe"

In [2]:
# column structures for stat projections
projection_columns = ["outlet","date", "playerId", "name", "shortName", "pos", "team", 'GamesPlayed',
 'PassAttempts','PassCompletions','PassingYards', 'PassingYardsPerGame', 'TouchdownsPasses', 
 'InterceptionsThrown', 'PasserRating',
 'RushingAttempts','RushingYards', 'AverageYardsPerRush', 'RushingTouchdowns',
 'Targets', 'Receptions', 'ReceivingYards', 'YardsPerGame', 'AverageYardsPerReception','ReceivingTouchdowns',
 'FumblesLost',
 'FieldGoalsMade','FieldGoalAttempts','LongestFieldGoal','FieldGoals119Yards','FieldGoals119YardAttempts',
 'FieldGoals2029Yards','FieldGoals2029YardAttempts','FieldGoals3039Yards','FieldGoals3039YardAttempts',
 'FieldGoals4049Yards','FieldGoals4049YardAttempts','FieldGoals50Yards','FieldGoals50YardsAttempts',
 'ExtraPointsMade','ExtraPointsAttempted',
 'Interceptions','Safeties','Sacks','Tackles','DefensiveFumblesRecovered','ForcedFumbles','DefensiveTouchdowns', 
 'ReturnTouchdowns','PointsAllowed','PointsAllowedPerGame','NetPassingYardsAllowed','RushingYardsAllowed',
 'TotalYardsAllowed', 'YardsAgainstPerGame','FantasyPoints','FantasyPointsPerGame']

ranking_columns = ["outlet", "date", "group", "expert", "rank","name","playerId","team","pos"]

adp_columns = ['outlet', 'date', 'playerId', 'name', 'shortName' , 'pos', 'team', 'adp', 'high', 'low']

# ----------- CBS - PPR -----------

### cbs projections

In [3]:
url_projections = "https://www.cbssports.com/fantasy/football/stats/{pos}/{year}/restofseason/projections/ppr/"
year = 2024
positions = ["QB", "RB", "WR", "TE", "K", "DST"]
tableClass = "TableBase-table"  
tableHeader = "TableBase-headTr"
headerClass = "Tablebase-tooltipInner"
tableRow = "TableBase-bodyTr"
tableD = "TableBase-bodyTd"

df_cbs_proj = pd.DataFrame(columns=projection_columns)

# loop through each position to retrieve HTML and convert to df
for p in range(len(positions)):
    
    time.sleep(3)
    #updating URL for each position
    url_formatted = url_projections.format(pos=positions[p], year=year)

    # retreiving HTML and converting it to soup
    r = requests.get(url_formatted)
    soup = bs(r.text)

    # accessing table with the data
    table = soup.find("table", class_= tableClass)

    
    # accounting for the difference in DEF headers
    if positions[p] == "DST":
        cols = ["pos", "team","name"]
    else:
        cols = ["playerId", "name", "shortName", "pos", "team"]
    
    ### grabbing column names from the thead for the position. These will be used to create the temp. pos dataframe
    #retrieving column names from the HTML
    for i in table.find_all("div", class_=headerClass):
        cols.append(''.join(filter(str.isalnum, i.text)))
        
    # accessing the data in the body
    body = table.find("tbody")
    # looping through rows
    data = []
    for tr in body.find_all("tr", class_=tableRow):
        # accounting for DST and populating pos as DST since it is not provided
        if positions[p] == "DST":
            player_data = ["DST"]
        else:
            player_data = []
        
        for td in tr.find_all("td", class_=tableD):
            
            if positions[p] == "DST":
                
                span = td.find_all("span",class_="CellLogoNameLockup")
                
                if span:
                    
                    for s in span:
                        player_data.append(s.find("a")["href"].split("/")[3])
                        player_data.append(str.strip(td.text))
                
                # non-span <Td>
                else:
                    player_data.append(str.strip(td.text))
                    
            # processing table body for all pos except DST
            else:
                #the player name, id, pos, and team are all in spans. the spans are not present in the stat <td>'s
                span_short = td.find_all("span",class_="CellPlayerName--short")
                span_long = td.find_all("span",class_="CellPlayerName--long")

                # if the <td> has a span, the player info will be extracted
                if span_long:

                    for s in span_long:
                        # player Id from the href url
                        player_data.append(s.find("a")["href"].split("/")[3])
                        # player full name
                        player_data.append(str.strip(s.find("a").text).replace(".", ""))

                    for s in span_short:
                        # player short name
                        player_data.append(s.find("a").text.replace(".", ""))
                        #player position
                        player_data.append(str.strip(s.find("span", class_="CellPlayerName-position").text))
                        #player nfl team
                        player_data.append(str.strip(s.find("span", class_="CellPlayerName-team").text))
            
                # non-span <Td>
                else:
                    player_data.append(str.strip(td.text))
        
        # creates the list of players, each player is a list with stats
        data.append(player_data)
    
    # converts list of list to data frame with the applicable columns pulled earlier
    pos_df = pd.DataFrame(data, columns=cols)
    
    # concats all of the position data to the master df
    df_cbs_proj = pd.concat([df_cbs_proj, pos_df], axis=0, ignore_index=True)

df_cbs_proj.loc[:,'outlet'] = "cbs"
df_cbs_proj.loc[:,'date'] = today

In [4]:
df_cbs_proj.to_excel("../data/projection/offseason/cbs_proj_{}.xlsx".format(str(today)), index=False)

### cbs draft rankings ****NOT WORKING SINCE 2024

In [63]:
url_rankings = "https://www.cbssports.com/fantasy/football/rankings/ppr/{pos}/"
positions = ["top200","QB", "RB", "WR", "TE", "K", "DST"]

# key class names that will be targeted
parentDivClass = "rankings-table multi-authors  "  # contains all expert rankings (3 tables)
individualRankingDivClass = """ 					experts-column  						triple	"""  # 3 of these for their 3 experts  
authorNameAClass = "author-name"
playersDivClass = "player-wrapper"  # the divs of interest are in here but it also includes data that is not needed 

df_cbs_ranking = pd.DataFrame(columns=ranking_columns)

for pos in positions:
    time.sleep(3)    
    # retreiving HTML and converting it to soup
    url_formatted = url_rankings.format(pos=pos)
    r = requests.get(url_formatted)
    soup = bs(r.text)

    # finding the tables with rankings
    rankingTables = soup.find_all("div", class_=individualRankingDivClass)
    
    # looping through the 3 expert ranks that are in their own tables
    player_ranking_data = []
    if pos == "top200":
        continue
        for rt in rankingTables:
            #extracting expert name
            expert = rt.find("a", class_=authorNameAClass).span.text

            #looping through the divs that contain all the player level ranking data
            ranks = rt.find("div", class_=playersDivClass)
            for p in ranks:

                temp = ["cbs", today, pos, expert]
                try:
                    temp.append(str.strip(p.find("div", class_="rank").text))  #expert rank, number  .text
                    temp.append(str.strip(p.find("span", class_="player-name").text).replace(".", ""))  #cbs shortName  .text
                    temp.append(str.strip(p.find("a")["href"].split("/")[4])) # cbs playerId is in the url
                    temp.append(str.strip(p.find("span", class_="team position").text.split()[0])) # contains the player nfl team 
                    temp.append(str.strip(p.find("span", class_="team position").text.split()[1])) # contains the player nfl position 
                    player_ranking_data.append(temp)
                except:
                    continue
        
    elif pos == "DST":
        for rt in rankingTables:
            #extracting expert name
            expert = rt.find("a", class_=authorNameAClass).span.text

            #looping through the divs that contain all the player level ranking data
            ranks = rt.find("div", class_=playersDivClass)
            for p in ranks:
                temp = ["cbs", today, pos, expert]
                try:
                    team = str.strip(p.find("span", class_="player-name").text)
                    temp.append(str.strip(p.find("div", class_="rank").text))  #expert rank, number  .text
                    temp.append(team)  #cbs shortName  .text
                    temp.append(str.strip(p.find("a")["href"].split("/")[4])) # cbs playerId is in the url
                    temp.append(team) # contains the player nfl team 
                    temp.append(pos) # contains the player nfl position 
                    player_ranking_data.append(temp)
                except:
                    continue
    
    else:
        for rt in rankingTables:
            #extracting expert name
            expert = rt.find("a", class_=authorNameAClass).span.text

            #looping through the divs that contain all the player level ranking data
            ranks = rt.find("div", class_=playersDivClass)
            for p in ranks:
                temp = ["cbs", today, pos, expert]
                try:
                    temp.append(str.strip(p.find("div", class_="rank").text))  #expert rank, number  .text
                    temp.append(str.strip(p.find("span", class_="player-name").text).replace(".", ""))  #cbs shortName  .text
                    temp.append(str.strip(p.find("a")["href"].split("/")[4])) # cbs playerId is in the url
                    temp.append(str.strip(p.find("span", class_="team position").text.split()[0])) # contains the player nfl team 
                    temp.append(pos) # contains the player nfl team 
                    player_ranking_data.append(temp)
                except:
                    continue
        
    # creating temp dataframe that includes all 3 expert rankings for a grouping to add to the master df 
    temp_df = pd.DataFrame(player_ranking_data, columns=ranking_columns)        
    df_cbs_ranking = pd.concat([df_cbs_ranking, temp_df], axis = 0, ignore_index=True)

In [None]:
df_cbs_ranking.to_excel("Data/projection/offseason/cbs_rank_{}.xlsx".format(str(today)), index=False)

### CBS ADP

In [5]:
cbs_adp_url = "https://www.cbssports.com/fantasy/football/draft/averages/"

r = requests.get(cbs_adp_url)
soup = bs(r.text)

table = soup.find("table", class_="TableBase-table")
body = table.find("tbody")

adps = []
for tr in body.find_all("tr"):
    temp = []
    
    data = tr.find_all("td")
    
    playerId = data[1].find("a")["href"].split("/")[3]
    shortName =  data[1].find("span", class_="CellPlayerName--short").text.split("\n")[0].replace(".", "")
    fullName =  data[1].find("span", class_="CellPlayerName--long").text.split("\n")[0].replace(".", "")
    pos = data[1].find("span", class_="CellPlayerName-position").text.strip()
    team =  data[1].find("span", class_="CellPlayerName-team").text.strip()
    
    adp = data[3].text.strip()
    
    highLow = data[4].text.split("/")
    high = highLow[0].strip()
    low = highLow[1]
    
    temp = ["cbs", today, playerId, fullName, shortName, pos, team, adp, high, low]
    adps.append(temp)
    
df_cbs_adp = pd.DataFrame(adps, columns = adp_columns)

In [7]:
df_cbs_adp.to_excel("../data/projection/offseason/cbs_adp_{}.xlsx".format(str(today)), index=False)

# ----------- Fantasy Pros - HPPR -----------

They use CBS and ESPN for season stats

### Fantasy Pros Rankings

In [8]:
url_fp_rankings = r"https://www.fantasypros.com/nfl/fantasy-football-rankings/{pos}.php"
fp_url_positions = {"top500":"half-point-ppr-overall", "QB":"qb", "RB":"half-point-ppr-rb", 
                    "WR":"half-point-ppr-wr", "TE":"half-point-ppr-te", "K":"k", "DST":"dst"}

all_ranks = []    
for k,v in fp_url_positions.items():  
    time.sleep(3)
    url_fp_formatted = url_fp_rankings.format(pos=v)
    r = requests.get(url_fp_formatted)
    soup = bs(r.text)
    
    # getting expert name and rank date
    experts = []
    for a in soup.find_all("th", class_="expert__th"):
        temp = []
        temp.append(a['data-sort-label'])   # expert name
        temp.append(str.strip(a.find("div", class_="expert__publish-date").text))  #ranking publish date
        experts.append(temp)

    # getting player info and ranks
    if k == "DST":
        for p in soup.find_all("tr", class_="player-row mpb-player__tr"):
            playerId = p["data-pid"]  # fp playerid
            shortName = p.find("span", class_="mobile-only").text.split()[0] # fp short name
            #fullName = p.find("span", class_="everything-but-mobile js-sort-field").text # fp full name
            TEAM = p.find("span", class_="player__team").text  # player nfl team
            POS = p.find("span", class_="player__position").text  # player position

            html_ranks = p.find_all("td", attrs={'class': None})
            for r in range(len(html_ranks)):
                temp_fp_ranking = [shortName, playerId, TEAM, POS] 

                temp_fp_ranking.insert(0, html_ranks[r].text) # ranking
                temp_fp_ranking.insert(0, experts[r][0]) # expert
                temp_fp_ranking.insert(0, k) # group ranking set
                temp_fp_ranking.insert(0, experts[r][1]) # date
                temp_fp_ranking.insert(0, "fantasyPros") # outlet

                all_ranks.append(temp_fp_ranking)
    else:
        for p in soup.find_all("tr", class_="player-row mpb-player__tr"):
            playerId = p["data-pid"]  # fp playerid
            shortName = p.find("span", class_="mobile-only").text.replace(".", "") # fp short name
            #fullName = p.find("span", class_="everything-but-mobile js-sort-field").text # fp full name
            TEAM = p.find("span", class_="player__team").text  # player nfl team
            POS = p.find("span", class_="player__position").text  # player position

            html_ranks = p.find_all("td", attrs={'class': None})
            for r in range(len(html_ranks)):
                temp_fp_ranking = [shortName, playerId, TEAM, POS] 

                temp_fp_ranking.insert(0, html_ranks[r].text) # ranking
                temp_fp_ranking.insert(0, experts[r][0]) # expert
                temp_fp_ranking.insert(0, k) # group ranking set
                temp_fp_ranking.insert(0, experts[r][1]) # date
                temp_fp_ranking.insert(0, "fantasyPros") # outlet

                all_ranks.append(temp_fp_ranking)
                
df_fp_ranking = pd.DataFrame(all_ranks, columns=ranking_columns)

In [9]:
df_fp_ranking.to_excel("../data/projection/offseason/fp_rank_{}.xlsx".format(str(today)), index=False)

### Fantasy Pros ADP

In [11]:
fp_adp_url = "https://www.fantasypros.com/nfl/adp/half-point-ppr-overall.php"

r = requests.get(fp_adp_url)
soup = bs(r.text)

table = soup.find_all("table")[0].find("tbody")

adps = []
for tr in table.find_all("tr"):
    
    temp = []
    data = tr.find_all("td")
    
    fullName = data[1].find("a", class_="player-name").text.replace(".", "")
    playerId = data[1].find("a", class_="fp-player-link")
    
    for c in data[1].find_all(class_=True):
        classes = c['class']
        if len(classes) > 1:
            for i in classes:
                if "id" in i:
                    playerId = i.split("id-")[1]
                    
    
        
    pos = re.search(pattern = r"\D*", string=data[2].text)[0]
    
    if pos == 'DST':
        team = fullName
    else:
        try:
            team = data[1].find("small").text
        except:
            team = "FA"
    
    
    ###
    # adding an entry for each sites adp. they are their own records
    yahoo = data[3].text
    temp = ["yahoo", today, playerId, fullName, np.nan, pos, team, yahoo, np.nan, np.nan]
    adps.append(temp)
    
    fantrax = data[4].text
    temp = ["fantrax", today, playerId, fullName, np.nan, pos, team, fantrax, np.nan, np.nan]
    adps.append(temp)
    
    ffc = data[5].text
    temp = ["ffc", today, playerId, fullName, np.nan, pos, team, ffc, np.nan, np.nan]
    adps.append(temp)
    
    sleeper = data[6].text
    temp = ["sleeper", today, playerId, fullName, np.nan, pos, team, sleeper, np.nan, np.nan]
    adps.append(temp)
    
    #avg = data[7].text
    
    
df_fp_adp = pd.DataFrame(adps, columns=adp_columns)


In [13]:
df_fp_adp.to_excel("../data/projection/offseason/fp_adp_{}.xlsx".format(str(today)), index=False)

# ----------- ESPN - HPPR -----------

### ESPN Projections

In [None]:
service = Service(browser_path)
driver = webdriver.Firefox(service=service)

# open the initial projection page
url_espn_proj = "https://fantasy.espn.com/football/players/projections"
driver.get(url_espn_proj) 
# sleep to let the html load
time.sleep(10)


try:
    # changing to the desired projection view
    button = driver.find_element(By.XPATH, "//button[@class='Button Button--filter player--filters__projections-button']")
    button.click()
    time.sleep(5)
    
    # grabs the entire pages html
    html = driver.execute_script("return document.body.innerHTML")
    soup = bs(html)
    
    # grabbing the number of pages there are in the projections
    pagenation_list = soup.find("div", class_="Pagination__wrap overflow-x-auto")
    pages = pagenation_list.find_all("li")
    last_page = pages[-1].text
    
except Exception as ex:
    print(ex)
    driver.close()
    

espn_player_proj_player = []
page_count1 = 0
page_count2 = 0

for page in range(1, int(last_page)+1):
    try:
        html = driver.execute_script("return document.body.innerHTML")
        soup = bs(html)

        # grabbing the projection tables
        tables = soup.find_all("table")
        
        # the player info table
        for tr in tables[0].find_all("tr"):
            for td in tr:
                if td.find("a", class_="AnchorLink link clr-link pointer"):
                    #grabs the ESPN player id from the image url
                    playerId = td.find("img")['src'].split("/")[-1].split(".")[0]
                    name = td.find("a", class_="AnchorLink link clr-link pointer").text.replace(".", "")
                    position = td.find("span", class_="playerinfo__playerpos ttu").text
                    team = td.find("span", class_="playerinfo__playerteam").text

                    espn_player_proj_player.append(["espn", today, playerId, name, np.nan, position, team, np.nan])


        # the stat projection table
        for tr in tables[1].find_all("tr",class_="Table__TR Table__TR--lg Table__odd"):
            comp_att = tr.find("div", {"title":"Each Pass Completed & Each Pass Attempted"}).text.split("/")
            pass_comps = comp_att[0]
            pass_atts = comp_att[1]
            pass_yds = tr.find("div", {"title":"Passing Yards"}).text
            pass_tds = tr.find("div", {"title":"TD Pass"}).text
            ints = tr.find("div", {"title":"Interceptions Thrown"}).text
            rush_atts = tr.find("div", {"title":"Rushing Attempts"}).text
            rush_yds = tr.find("div", {"title":"Rushing Yards"}).text
            rush_tds = tr.find("div", {"title":"TD Rush"}).text
            rec = tr.find("div", {"title":"Each reception"}).text
            rec_yds = tr.find("div", {"title":"Receiving Yards"}).text
            rec_tds = tr.find("div", {"title":"TD Reception"}).text
            rec_trgts = tr.find("div", {"title":"Receiving Target"}).text
            
            espn_player_proj_player[page_count1].extend([pass_atts, pass_comps,pass_yds, 0, pass_tds,
                                                         ints, 0, rush_atts,rush_yds,0, rush_tds,rec_trgts,rec,rec_yds,0,0,rec_tds,
                                                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0])
            page_count1 += 1
       
        # the fantasy points table
        for tr in tables[2].find_all("tr",class_="Table__TR Table__TR--lg Table__odd"):
            for div in tr.find_all("div"):
                # some of the free agents/retired players don't have div["title"] need to catch them with try
                try:
                    if 'point' in div['title']:
                        total_ff_pts = div.find("span").text
                    else:
                        avg_ff_pts = div.find("span").text
                except:
                    total_ff_pts = 0
                    avg_ff_pts = 0

            espn_player_proj_player[page_count2].extend([total_ff_pts, avg_ff_pts])
            page_count2 += 1
        
        #checks for last page
        
        if page < int(last_page):
            # jumping to the next page
            nextButton = driver.find_element(By.XPATH, "//button[@class='Button Button--default Button--icon-noLabel Pagination__Button Pagination__Button--next']")
            nextButton.click()
            time.sleep(10)
            
    except Exception as ex:
        print(ex)
        driver.close()

try:
    driver.close()
except:
    pass

# creating df from gathered data to merge into final df that matches the cbs structure
temp_proj = pd.DataFrame(espn_player_proj_player, columns = projection_columns)
                        

df_espn_proj = pd.DataFrame(columns = projection_columns)

#final espn projections data
df_espn_proj = pd.concat([df_espn_proj, temp_proj]).replace("--", 0)

In [16]:
df_espn_proj.to_excel("../data/projection/offseason/espn_proj_{}.xlsx".format(str(today)), index=False)

### ESPN Rankings

In [18]:
espn_ranking_urls = {
"QB":"https://www.espn.com/fantasy/football/story/_/id/36312955/nfl-fantasy-football-rankings-2024-qb-quarterback",
"RB":"https://www.espn.com/fantasy/football/story/_/id/36313077/nfl-fantasy-football-rankings-2024-rb-running-back-ppr",
"WR":"https://www.espn.com/fantasy/football/story/_/id/36313408/nfl-fantasy-football-rankings-2024-wr-wide-receiver-ppr",
"TE":"https://www.espn.com/fantasy/football/story/_/id/36313475/nfl-fantasy-football-rankings-2024-te-tight-end-ppr",
"K":"https://www.espn.com/fantasy/football/story/_/id/36313520/nfl-fantasy-football-rankings-2024-kicker-k",
"DST":"https://www.espn.com/fantasy/football/story/_/id/36313516/nfl-fantasy-football-rankings-2024-dst-defense",
"IDP":"https://www.espn.com/fantasy/football/story/_/id/36334764/2023-fantasy-football-rankings-idp-defense-defensive-line-linebacker-defensive-back",
"top200":"https://www.espn.com/fantasy/football/story/_/id/36135778/fantasy-football-ppr-rankings-2024-quarterback-running-back-wide-receiver-tight-end-top-200"
}

# final dataframe structure to hosue all the rankings
df_espn_ranking = pd.DataFrame(columns=ranking_columns)

service = Service(browser_path)
driver = webdriver.Firefox(service=service)

# looping through the urls to aggregate the rankings
for group, url in espn_ranking_urls.items():
    # opening the webpage and allowing the scripts to load for the HTML to be accessed
    
    url_espn_formatted = url
    driver.get(url_espn_formatted)
    time.sleep(10)

    # grabs the entire pages html
    html = driver.execute_script("return document.body.innerHTML")
    soup = bs(html)
    
    # this will hold a list of list. One list will be a players rank for a single expert
    player_ranks = []

    # the top 100 and 200 have different html structure
    if group == "top100" or group == "top200":
        
        # grabbing the desired html chunk
        ranking_table = soup.find("div", class_="article-body").find_all("p")
        #driver.close()
        
        
        if group == "top100":
            expert = "Berry"
        else:
            expert = "Cockcroft"
        
        # the <p> have <a> in them for each rank, this is looping through both to gather the ranks
        rank = 1
        for p in ranking_table[2:]:

            test = p.find_all("a")
            if len(test) >= 10:
                for a in test:
                    fullName = a.text.replace(".", "")
                    playerId = a['href'].split("/")[7]

                    player_ranks.append(["espn", today, group, expert, rank, fullName, playerId, np.nan,  np.nan])
                    rank += 1
    
    # IDP page has 3 separate tables for positions instead of a single position on the page and a single table handled in the else below
    elif group == "IDP":
        ranking_tables = soup.find_all("table", class_="inline-table rankings-table fullWidth sortable")
        count = 0 # hard coded the positions based on the which table the site holds them in
        
        # 3 tables for the 3 IDPs  DL, LB, DB
        for ranking_table in ranking_tables:

            # retrieves the expert names and the order they are listed
            expert_names_html = ranking_table.find("thead").find_all("th")
            expert_names = []
            for tr in range(2, len(expert_names_html)):
                expert_names.append(expert_names_html[tr].text)

            player_ranks_html = ranking_table.find("tbody").find_all("tr", class_="")
            for tr in player_ranks_html:

                tds = tr.find_all("td")

                playerId = tds[0].find("a")["data-player-id"]
                name = tds[0].find("a").text.replace(".", "")

                if count == 0:
                    POS = "DL"
                elif count == 1:
                    POS = "LB"
                elif count == 2:
                    POS = "DB"

                # try block to handle injury designations that the site puts in the same text as the team name
                try:
                    #if there is a injury designation, it retrieves it and then removes it from the team name
                    injury = tds[0].find_all("div", class_="rank")[0].find("span").text
                    if len(injury) > 1:  # Accounts for suspended tag "SSPD"
                        team = tds[0].find_all("div", class_="rank")[0].text.split(",")[1].strip().upper()[:-4]
                    else:
                        team = tds[0].find_all("div", class_="rank")[0].text.split(",")[1].strip().upper()[:-1]

                except:
                    team = tds[0].find("div", class_="rank").text.split(",")[1].strip().upper()

                for i in range(len(expert_names)):

                    # expert name from the list generated from thead
                    expert = expert_names[i]
                    # position of the expert ranking column in tbody
                    idx = i + 2

                    # retrieves the expert rank from tbody rows
                    exRank = pd.to_numeric(tds[idx].text, errors='coerce')

                    player_ranks.append(["espn", today, group, expert, exRank, name, playerId, team,  POS])

            count += 1
    
    
    # for position specific rankings
    else:
    
        ranking_table = soup.find("table", class_="inline-table rankings-table fullWidth sortable")
        #driver.close()
        
        # retrieves the expert names and the order they are listed
        expert_names_html = ranking_table.find("thead").find_all("th")
        expert_names = []
        for tr in range(2, len(expert_names_html)):
            expert_names.append(expert_names_html[tr].text)

        player_ranks_html = ranking_table.find("tbody").find_all("tr", class_="")
        for tr in player_ranks_html:

            tds = tr.find_all("td")

            playerId = tds[0].find("a")["data-player-id"]
            if group == "DST":
                name = tds[0].find("a").text.split()[0]
            else:
                name = tds[0].find("a").text.replace(".", "")
                
            POS = group
            
            #team = tds[0].find("div", class_="rank").text.split(",")[1].strip().upper()
            # try block to handle injury designations that the site puts in the same text as the team name
            try:
                #if there is a injury designation, it retrieves it and then removes it from the team name
                injury = tds[0].find_all("div", class_="rank")[0].find("span").text
                if len(injury) > 1:  # Accounts for suspended tag "SSPD"
                    team = tds[0].find_all("div", class_="rank")[0].text.split(",")[1].strip().upper()[:-4]
                else:
                    team = tds[0].find_all("div", class_="rank")[0].text.split(",")[1].strip().upper()[:-1]

            except:
                team = tds[0].find("div", class_="rank").text.split(",")[1].strip().upper()

            for i in range(len(expert_names)-1):
                
                # expert name from the list generated from thead
                expert = expert_names[i]
                # position of the expert ranking column in tbody
                idx = i + 2

                # retrieves the expert rank from tbody rows
                exRank = pd.to_numeric(tds[idx].text, errors='coerce')

                player_ranks.append(["espn", today, group, expert, exRank, name, playerId, team,  POS])

    
    temp_df = pd.DataFrame(player_ranks, columns=ranking_columns)
    df_espn_ranking = pd.concat([df_espn_ranking, temp_df], axis = 0, ignore_index=True)
driver.close()    

In [19]:
df_espn_ranking.to_excel("../data/projection/offseason/espn_rank_{}.xlsx".format(str(today)), index=False)

### ESPN ADP

In [20]:
service = Service(browser_path)
driver = webdriver.Firefox(service=service)

# open the initial adp page
espn_adp_url = "https://fantasy.espn.com/football/livedraftresults"
driver.get(espn_adp_url) 
# sleep to let the html load
time.sleep(10)

html = driver.execute_script("return document.body.innerHTML")
soup = bs(html)

table = soup.find("tbody", class_="Table__TBODY")

adps = []

for n in range(10):
    
    for tr in table.find_all("tr"):
        temp = []
        data = tr.find_all("td")

        fullName = data[1].find("a", class_="AnchorLink link clr-link pointer").text.replace(".", "")
        pos = data[1].find("span", class_="playerinfo__playerpos").text.replace("/","")
        try:
            team = data[1].find("span", class_="playerinfo__playerteam").text
        except:
            team = "FA"
        adp = data[2].text
        
        if pos == "DST":
            playerId = ""
        else:
            playerId  = data[1].find('img', src=True)['src'].split("/")[10].split(".")[0]

        temp = ["espn", today, playerId, fullName, np.nan, pos, team, adp, np.nan, np.nan]
        adps.append(temp)
    
    # looping over the pages for ADP
    button = driver.find_element(By.XPATH, "//button[@class='Button Button--default Button--icon-noLabel Pagination__Button Pagination__Button--next']")
    button.click()
    time.sleep(10)
    
    # grabs the entire pages html for the new page and sets it for the next scrap iteration
    html = driver.execute_script("return document.body.innerHTML")
    soup = bs(html)
    
    table = soup.find("tbody", class_="Table__TBODY")

driver.close()
df_espn_adp = pd.DataFrame(adps, columns=adp_columns)

In [21]:
df_espn_adp.to_excel("../data/projection/offseason/espn_adp_{}.xlsx".format(str(today)), index=False)

# ------------ NFL --------------

### NFL projections

In [22]:
# position=  0:QB,RB,WR,TE  7:Kicker, 8:D
nfl_proj_url = [
"https://fantasy.nfl.com/research/projections?offset={}&position=0&statCategory=projectedStats&statSeason=2024&statType=seasonProjectedStats&statWeek=1",
"https://fantasy.nfl.com/research/projections?offset={}&position=7&statCategory=projectedStats&statSeason=2024&statType=seasonProjectedStats&statWeek=1",    
"https://fantasy.nfl.com/research/projections?offset={}&position=8&statCategory=projectedStats&statSeason=2024&statType=seasonProjectedStats&statWeek=1"
]

df_nfl_proj = pd.DataFrame(columns=projection_columns)
player_data = []
# count will be updated to the player count after the first page load 
# this is being used to avoid loading more pages than needed
count = 3000

#looping through the 3 URLs, the site has QB,RB,WR,TE combined in a single list and then K and D on their own pages
for i in range(3):
    if i == 0:  # this will handle the offensive players
        while count > 25:
            if count == 3000:
                time.sleep(1)
                r = requests.get(nfl_proj_url[0].format(1))
                soup = bs(r.text)

                # grabs the number of players with projections on the site. pagenated at 25 a page
                player_count = int(soup.find("span", class_="paginationTitle").text.split("of")[-1].strip())
                count = player_count

                table = soup.find_all("table", class_="tableType-player hasGroups")

                body_trs = table[0].find("tbody").find_all("tr")

                for tr in body_trs:
                    data = tr.find_all("td")

                    firstColA = data[0].find('a')
                    playerId = firstColA['href'].split("=")[2]
                    fullName = firstColA.text.strip().replace(".", "")

                    posAndTeam = data[0].find('em').text.split("-")
                    pos = posAndTeam[0].strip()
                    try:
                        team = posAndTeam[1].strip()
                    except:
                        team = "FA"

                    gp = data[2].text
                    PassingYards = data[3].text
                    TouchdownsPasses = data[4].text
                    InterceptionsThrown = data[5].text
                    RushingYards = data[6].text
                    RushingTouchdowns = data[7].text
                    Receptions = data[8].text
                    ReceivingYards = data[9].text
                    ReceivingTouchdowns = data[10].text
                    retTd = data[11].text
                    fumTd = data[12].text
                    twoPt= data[13].text
                    FumblesLost = data[14].text
                    FantasyPoints = data[15].text

                    temp = ["nfl", today, playerId,fullName,np.nan,pos,team,gp,0,0,PassingYards,0,TouchdownsPasses, InterceptionsThrown,
                            0,0,RushingYards,0,RushingTouchdowns,0,Receptions,ReceivingYards,0,0,ReceivingTouchdowns,
                            FumblesLost,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FantasyPoints,0]
                    player_data.append(temp)

            else:
                for j in range(26, player_count, 25):
                    time.sleep(1)
                    r = requests.get(nfl_proj_url[0].format(j))
                    soup = bs(r.text)
                    table = soup.find_all("table", class_="tableType-player hasGroups")
                    body_trs = table[0].find("tbody").find_all("tr")

                    for tr in body_trs:
                        data = tr.find_all("td")

                        firstColA = data[0].find('a')
                        playerId = firstColA['href'].split("=")[2]
                        fullName = firstColA.text.strip().replace(".", "")

                        posAndTeam = data[0].find('em').text.split("-")
                        pos = posAndTeam[0].strip()
                        try:
                            team = posAndTeam[1].strip()
                        except:
                            team = "FA"

                        gp = data[2].text
                        PassingYards = data[3].text
                        TouchdownsPasses = data[4].text
                        InterceptionsThrown = data[5].text
                        RushingYards = data[6].text
                        RushingTouchdowns = data[7].text
                        Receptions = data[8].text
                        ReceivingYards = data[9].text
                        ReceivingTouchdowns = data[10].text
                        retTd = data[11].text
                        fumTd = data[12].text
                        twoPt= data[13].text
                        FumblesLost = data[14].text
                        FantasyPoints = data[15].text

                        temp = ["nfl", today, playerId,fullName,np.nan,pos,team,gp,0,0,PassingYards,0,TouchdownsPasses, InterceptionsThrown,
                                0,0,RushingYards,0,RushingTouchdowns,0,Receptions,ReceivingYards,0,0,ReceivingTouchdowns,
                                FumblesLost,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,FantasyPoints,0]
                        player_data.append(temp)

                    count -= 25
                    
    else: # this will handle K and D
        for j in range(2):  
            
            time.sleep(1)
            r = requests.get(nfl_proj_url[i].format(j*25+1))  # k and d only have 2 pages, j *25 + 1 handles the url offset that pagenates
            soup = bs(r.text)

            table = soup.find_all("table", class_="tableType-player hasGroups")

            body_trs = table[0].find("tbody").find_all("tr")

            for tr in body_trs:
                data = tr.find_all("td")
                temp = []
                
                firstColA = data[0].find('a')
                playerId = firstColA['href'].split("=")[2]
                fullName = firstColA.text.strip().replace(".", "")

                posAndTeam = data[0].find('em').text.split("-")
                pos = posAndTeam[0].strip()
                
                if i == 1:  # K url
                    try:
                        team = posAndTeam[1].strip()
                    except:
                        team = "FA"
                        
                    gp = data[2].text
                    xpMade = data[3].text
                    made0_19 = data[4].text
                    made20_29 = data[5].text
                    made30_39 = data[6].text
                    made40_49 = data[7].text
                    made50 = data[8].text
                    fgMade = made0_19 + made20_29 + made30_39 + made40_49 + made50
                    FantasyPoints = data[9].text

                    temp = ["nfl", today, playerId,fullName,np.nan,pos,team,gp,0,0,0,0,0, 0,
                                    0,0,0,0,0,0,0,0,0,0,0,0,
                                    fgMade,0,0,made0_19,0,made20_29,0,made30_39,0,made40_49,0,made50,0,xpMade,0,0,
                                     0,0,0,0,0,0,0,0,0,0,0,0,0,FantasyPoints,0]
                    player_data.append(temp)    
                        
                else: # D url
                    
                    team = fullName
                    gp = data[2].text
                    sacks = data[3].text
                    interceptions = data[4].text
                    fum = data[5].text
                    safety = data[6].text
                    defTd = data[7].text
                    twoPt = data[8].text
                    retTd = data[9].text
                    ptsAllowed= data[10].text
                    fantasyPts= data[11].text
                    
                        
                    
                    temp = ["nfl", today, playerId,np.nan, np.nan,pos,team,gp,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,
                            FumblesLost,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,interceptions,safety,sacks,0,fum,0,
                            defTd, retTd,ptsAllowed,0,0,0,0,0,fantasyPts,0]
                    
                    player_data.append(temp)
        
df_nfl_proj = pd.DataFrame(player_data, columns=projection_columns).replace("-",0)

  df_nfl_proj = pd.DataFrame(player_data, columns=projection_columns).replace("-",0)


In [23]:
df_nfl_proj.to_excel("../data/projection/offseason/nfl_proj_{}.xlsx".format(str(today)), index=False)

### NFL rankings

In [24]:
nfl_rank_url = {
    "QB":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=QB&sort=1&statSeason=2024&statType=seasonStats",
    "RB":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=RB&sort=1&statSeason=2024&statType=seasonStats",
    "WR":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=WR&sort=1&statSeason=2024&statType=seasonStats",
    "TE":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=TE&sort=1&statSeason=2024&statType=seasonStats",
    "K":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=K&sort=1&statSeason=2024&statType=seasonStats",
    "DEF":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=DEF&sort=1&statSeason=2024&statType=seasonStats",
    "DL":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=DL&sort=1&statSeason=2024&statType=seasonStats",
    "LB":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=LB&sort=1&statSeason=2024&statType=seasonStats",
    "DB":"https://fantasy.nfl.com/research/rankings?leagueId=0&position=DB&sort=1&statSeason=2024&statType=seasonStats"
}

df_nfl_ranking = pd.DataFrame(columns=ranking_columns)

for k,v in nfl_rank_url.items():
    
    time.sleep(1)
    r = requests.get(nfl_rank_url[k])
    soup = bs(r.text)

    # grabs the number of players with projections on the site. pagenated at 25 a page
    rank_table = soup.find("table", class_="tableType-player noGroups").find("tbody")

    player_ranks = []
    for tr in rank_table.find_all("tr"):
        player_data = []
        td = tr.find_all("td")
        
        pos_rank = int(td[0].text)
        playerId = int(td[1].find("a")['href'].split("=")[-1])
        full_name = td[1].find("a").text.replace(".", "")
        
        pos = td[1].find("em").text.split("-")[0].strip()
        
        if k == "DEF":
            team = ""
            
        
        else:
            # no team name for FAs
            try:
                team = td[1].find("em").text.split("-")[1].strip()
            except:
                team = "FA"
            
        ovr_rank = int(td[-1].text)

        player_data = ["nfl", today, k, "nfl", pos_rank, full_name, playerId, team, pos ]
        player_ranks.append(player_data)

    temp_df = pd.DataFrame(player_ranks, columns=ranking_columns)
    df_nfl_ranking = pd.concat([df_nfl_ranking, temp_df], axis = 0, ignore_index=True)

In [25]:
df_nfl_ranking.to_excel("../data/projection/offseason/nfl_rank_{}.xlsx".format(str(today)), index=False)

# bettingpros PLAYER TOTAL PROPS

In [None]:
#######################################
#######  THIS DIDN"T WORK AFTER AUGUST 2022
#############################################
#########
################################################

service = Service(r"C:\Users\jrbrz\Desktop\projects\projects\ffDraft\browsers\geckodriver.exe")
driver = webdriver.Firefox(service=service)

overUnder_urls = {
    "RByds":"https://www.bettingpros.com/nfl/picks/prop-bets/bet/rushing-yards/",
    "RBtds":"https://www.bettingpros.com/nfl/picks/prop-bets/bet/rushing-touchdowns/",
    "WRyds":"https://www.bettingpros.com/nfl/picks/prop-bets/bet/receiving-yards/",
    "WRtds":"https://www.bettingpros.com/nfl/picks/prop-bets/bet/receiving-touchdowns/"
}

overUnders = []
for k,v in overUnder_urls.items():
    # open the initial adp page
    driver.get(v) 
    # sleep to let the html load
    time.sleep(10)

    html = driver.execute_script("return document.body.innerHTML")
    soup = bs(html)
    
    table = soup.find("table").find("tbody")

    for tr in table.find_all("tr"):
        temp = []
        
        data = tr.find_all("td")

        pos = data[1].find("div", class_="yearbook-block__description").text.strip().split(" - ")[1].strip()

        if ((pos == "RB") and ((k == "RByds") or (k == "RBtds"))) or ((pos == "WR") and ((k == "WRyds") or (k == "WRtds"))):

            full = data[1].find("span", class_="yearbook-block__title--block player-name player-name--desktop").text.strip().replace(".", "")
            short = data[1].find("span", class_="yearbook-block__title--block player-name player-name--mobile").text.strip().replace(".", "")
            last = data[1].find("span", class_="yearbook-block__title--block player-name").text.strip().replace(".", "")
            
            for c in data[1].find_all(class_=True):
                classes = c['class']
                if len(classes) > 1:
                    for i in classes:
                        if "id-" in i:
                            playerId = i.split("id-")[1]
                            
            
            fullName = full + " " + last
            shortName = short + " " + last

            team = data[1].find("div", class_="yearbook-block__description").text.strip().split(" - ")[0].strip()

            overUnder = data[4].text.strip().split()[0]
            projectedTotal = data[5].text
            
            temp = ["bettingPros", today, k, playerId, fullName, shortName, pos, team, overUnder, projectedTotal]
            overUnders.append(temp)

driver.close()
df_overUnders = pd.DataFrame(overUnders, columns=["outlet", "date", "category", "playerId", "name", "shortName", "pos", "team", "ou", "projected"])

In [None]:
df_overUnders.to_excel("Data/overUnder_{}.xlsx".format(str(today)), index=False)

# <<< SCRATCH >>>

In [None]:
season = 2022
week = 1
service = Service(r"C:\Users\jrbrz\Desktop\projects\projects\ffDraft\browsers\geckodriver.exe")
driver = webdriver.Firefox(service=service)

urls = {"overunder": r'https://www.bettingpros.com/nfl/odds/spread/?season={season}&week={week}',
"moneyline":r"https://www.bettingpros.com/nfl/odds/moneyline/?season={season}&week={week}",
"totals":r"https://www.bettingpros.com/nfl/odds/total/?season={season}&week={week}"}
           
lines = []
for k,v in overUnder_urls.items():
    driver.get(ou_url.format(season=season, week=week)) 
    # sleep to let the html load
    time.sleep(10)

    html = driver.execute_script("return document.body.innerHTML")
    soup = bs(html)

    data = soup.find_all("div", class_="flex odds-offer")

In [None]:
service = Service(r"C:\Users\jrbrz\Desktop\projects\projects\ffDraft\browsers\geckodriver.exe")
driver = webdriver.Firefox(service=service)

driver.get("https://www.espn.com/fantasy/football/story/_/id/33898295/fantasy-football-idp-rankings-2022-top-50-defensive-linemen-linebackers-defensive-backs")
time.sleep(10)

# grabs the entire pages html
html = driver.execute_script("return document.body.innerHTML")
soup = bs(html)
driver.close()

In [None]:
player_ranks = []
ranking_tables = soup.find_all("table", class_="inline-table rankings-table fullWidth sortable")
count = 0
for ranking_table in ranking_tables:
        
    # retrieves the expert names and the order they are listed
    expert_names_html = ranking_table.find("thead").find_all("th")
    expert_names = []
    for tr in range(2, len(expert_names_html)):
        expert_names.append(expert_names_html[tr].text)

    player_ranks_html = ranking_table.find("tbody").find_all("tr", class_="")
    for tr in player_ranks_html:

        tds = tr.find_all("td")

        playerId = tds[0].find("a")["data-player-id"]
        if group == "DST":
            name = tds[0].find("a").text.split()[0]
        else:
            name = tds[0].find("a").text
            
        if count == 0:
            POS = "DL"
        elif count == 1:
            POS = "LB"
        elif count == 2:
            POS = "DB"
        
        # try block to handle injury designations that the site puts in the same text as the team name
        try:
            #if there is a injury designation, it retrieves it and then removes it from the team name
            injury = tds[0].find_all("div", class_="rank")[0].find("span").text
            team = tds[0].find_all("div", class_="rank")[0].text.split(",")[1].strip().upper().replace(injury, "")

        except:
            team = tds[0].find("div", class_="rank").text.split(",")[1].strip().upper()

        for i in range(len(expert_names)):

            # expert name from the list generated from thead
            expert = expert_names[i]
            # position of the expert ranking column in tbody
            idx = i + 2

            # retrieves the expert rank from tbody rows
            exRank = pd.to_numeric(tds[idx].text, errors='coerce')

            player_ranks.append([today, group, expert, exRank, name, playerId, team,  POS])
            
    count += 1
    