In [4]:
from tkinter import S
import yahoo_fantasy_api as yfa
from yahoo_oauth import OAuth2
import pandas as pd
import json
from get_chrome_driver import GetChromeDriver
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time

#connect to the yahoo API
#this oauth file must have been generated by me, not sure the instructions to do so.  probably easy.
oauth = OAuth2(None, None, from_file='oauth2.json')

#API docs: https://yahoo-fantasy-api.readthedocs.io/en/latest/yahoo_fantasy_api.html
football = yfa.game.Game(oauth, 'nfl')

leagues = football.league_ids()
l = football.to_league(leagues[-3]) #sample league for testing

[2023-04-26 15:52:10,784 DEBUG] [yahoo_oauth.oauth.__init__] Checking 
[2023-04-26 15:52:10,800 DEBUG] [yahoo_oauth.oauth.token_is_valid] ELAPSED TIME : 3944821.396823406
[2023-04-26 15:52:10,801 DEBUG] [yahoo_oauth.oauth.token_is_valid] TOKEN HAS EXPIRED
[2023-04-26 15:52:10,805 DEBUG] [yahoo_oauth.oauth.refresh_access_token] REFRESHING TOKEN


In [None]:
#gather leagues associated with my account
leagues = football.league_ids()

for league in leagues:
    try:
        #load the latest data that we've processed
        league_master_df = pd.read_csv('league_df_master.csv')
        loaded_leagues = list(league_master_df.league_key)
    except:
        #we need to start new
        loaded_leagues = []
    if league not in loaded_leagues:
        print(f'Appending league {league} to league_df')
        league_settings=football.to_league(league).settings()
        league_df = pd.DataFrame(league_settings)
        try:
            league_df_new = pd.concat([league_master_df,league_df])
        except:
            league_df_new = league_df.copy()
        league_df_new.reset_index(drop=True,inplace=True)
        league_df_new.to_csv('league_df_master.csv',index=False)#now export the latest file in case we are interrupted

leagues_df = pd.read_csv('league_df_master.csv') #load the full dataset
leagues_df.reset_index(inplace=True,drop=True)
print('Leagues df created')

#assemble managers for each league
#this cell takes a few minutes to run
    
managers = []
for league in leagues:
    try:
        #we'll cache the latest data here to enable picking up from where we left off
        managers_master_df = pd.read_csv('managers_df_master.csv')
        managers_master_df.drop_duplicates(inplace=True)
        loaded_manager_leagues = list(managers_master_df.league_key)
    except:
        #we need to start new
        loaded_manager_leagues = []
    if league not in loaded_manager_leagues:
        print(f'Appending managers for league {league}')
        lg = football.to_league(league)
        for k in lg.teams().keys():
            team = lg.teams()[k]
            manager = lg.teams()[k]['managers'][0]['manager']
            try:
                faab = team['faab_balance']
            except:
                faab = ''
            try:
                felo_score = manager['felo_score']
            except:
                felo_score = ''
            try:
                draft_grade = team['draft_grade']
            except:
                draft_grade = ''
            managers.append((k,league, lg.settings()['season'],lg.settings()['name'],team['team_id'],team['name'],manager['nickname'],felo_score,team['url'],draft_grade,team['number_of_moves'],faab,team['number_of_trades'],team['roster_adds']['coverage_value']))
        manager_df = pd.DataFrame(managers,columns=['l_manager_key','league_key','season','league_name','team_id','team','manager','felo_score','url','draft_grade','number_of_moves','number_of_trades','faab_balance','roster_adds'])
        try:
            new_manager_df = pd.concat([managers_master_df,manager_df])
        except:
            new_manager_df = manager_df.copy()
        new_manager_df.to_csv('managers_df_master.csv',index=False)

manager_df = pd.read_csv('managers_df_master.csv') #load the full dataset
manager_df.drop_duplicates(inplace=True)
manager_df.reset_index(inplace=True,drop=True)
print('Managers lists assembled')

#group leagues together if manager membership meets threshold
match_threshold = .75

league_keys = []
for key in manager_df.l_manager_key:
    league_keys.append('.'.join(key.split('.')[:-2]))
manager_df['league_key'] = league_keys

manager_df.sort_values(by=['season','league_name'],ascending=False,inplace=True)
seasons_df = manager_df[['league_key','season','league_name']].copy()
seasons_df.drop_duplicates(subset=['season','league_name'],inplace=True) #could use season_id for this?
seasons = []
for index, row in seasons_df.iterrows():
    seasons.append((row['season'],row['league_name'],row['league_key']))
league_profiles = {}
league_mappings = []
i=0
print('Grouping seasons...')
for s in seasons:
    print(s)
    matched = False
    s_league_df = manager_df[(manager_df.season == s[0]) & (manager_df.league_name == s[1])]
    if len(league_profiles)==0:
        historical_managers = []
        historical_teams = []
        for index, row in s_league_df.iterrows():
            if row['manager'] != '-- hidden --':
                historical_managers.append(row['manager'])
            if row['team'] != '-- hidden --':
                historical_teams.append(row['team'])
        league_profiles[i] = (s_league_df,historical_managers,historical_teams) #creating the first profile which is the most recent season 
        league_mappings.append((s[2],i))
        i=i+1
    else:
        for l in league_profiles.keys(): #compare current league to set of already identified league classes
            if matched == False:
                profile_managers = league_profiles[l][1].copy()
                profile_teams = league_profiles[l][2].copy()
                match_score = 0
                for index, row in s_league_df.iterrows():
                    if row['manager'] in profile_managers:
                        #print(row['manager'])
                        match_score = match_score + 1
                    elif row['team'] in profile_teams:
                        #print(row['team'])
                        match_score = match_score + 1
                    #updating profile lists in case we need to update the dictionary
                    if (row['manager'] != '-- hidden --') & (row['manager'] not in profile_managers):
                        profile_managers.append(row['manager'])
                    if (row['team'] != '-- hidden --') & (row['team'] not in profile_teams):
                        profile_teams.append(row['team'])
                if match_score / s_league_df.shape[0] > match_threshold:
                    #print(match_score / s_league_df.shape[0])
                    league_mappings.append((s[2], l))
                    league_profiles[l] = (league_profiles[l][0],profile_managers,profile_teams) #update dictionary with latest names
                    matched = True
        #we assume there were no matches, so create a new profile
        if matched == False:
            league_profiles[i] = (s_league_df, list(s_league_df['manager']), list(s_league_df['team']))
            #print(f"new profile: {i}")
            #print(league_profiles[i])
            league_mappings.append((s[2],i))
            i=i+1

season_dict = {}
for m in league_mappings:
    season_dict[m[0]] = m[1]

league_ids = []
for league_key in leagues_df.league_key:
    league_ids.append(season_dict[league_key])
leagues_df['group_id'] = league_ids

leagues_df.sort_values(by=['group_id','season'],ascending=[True,False],inplace=True)
leagues_df.drop_duplicates(subset=['season','group_id'])[['name','season','group_id']]

#assemble all standings data for all leagues
standings = []
for league in leagues:
    l = football.to_league(league)
    standings.append((league,l.standings()))

standings_dfs = []
for s in standings:

    #method to compile standings data (minus faab)
    standings_df = pd.DataFrame(s[1])

    dfs=[]
    for a in standings_df.outcome_totals.values:
        a_df = pd.DataFrame([a])
        dfs.append(a_df)
    array_df = pd.concat(dfs)
    array_df.reset_index(drop=True,inplace=True)

    full_standings = pd.merge(standings_df,array_df,left_index=True,right_index=True)

    #get moves and faab from manager_df
    moves = []
    faab_budget = []
    for manager in full_standings.team_key:
        moves.append(manager_df[manager_df.l_manager_key==manager].number_of_moves.values[0])
        faab_budget.append(manager_df[manager_df.l_manager_key==manager].faab_balance.values[0])
    full_standings['moves'] = moves
    full_standings['faab_balance'] = faab_budget
    full_standings['league_key'] = s[0]
    standings_dfs.append(full_standings)
standings_master = pd.concat(standings_dfs)
#standings_master.to_csv('standings_master.csv', index=False)

#playoff wins
playoff_dfs = []
for season in standings_master.league_key:
    season_df = standings_master[standings_master.league_key==season].copy()
    num_managers = season_df.shape[0]
    playoff_wins = []
    for index, row in season_df.iterrows():
        try:
            rank = int(row['rank'])
            seed = int(row['playoff_seed'])
            if num_managers == 10:
                if rank == 1:
                    if seed <= 2:
                        playoff_wins.append(2)
                    else:
                        playoff_wins.append(3)
                elif rank == 2:
                    if seed <= 2:
                        playoff_wins.append(1)
                    else:
                        playoff_wins.append(2)
                elif rank == 3:
                    if seed <= 2:
                        playoff_wins.append(1)
                    else:
                        playoff_wins.append(2)
                elif rank == 4:
                    if seed <= 2:
                        playoff_wins.append(0)
                    else:
                        playoff_wins.append(1)
                elif rank == 5:
                    playoff_wins.append(1)
                elif rank == 6:
                    playoff_wins.append(0)
                elif rank == 7:
                    playoff_wins.append(2)
                elif rank == 8:
                    playoff_wins.append(1)
                elif rank == 9:
                    playoff_wins.append(1)
                elif rank == 10:
                    playoff_wins.append(0)
            elif num_managers == 8:
                if rank == 1:
                    if seed <= 2:
                        playoff_wins.append(2)
                    else:
                        playoff_wins.append(3)
                elif rank == 2:
                    if seed <= 2:
                        playoff_wins.append(1)
                    else:
                        playoff_wins.append(2)
                elif rank == 3:
                    if seed <= 2:
                        playoff_wins.append(1)
                    else:
                        playoff_wins.append(2)
                elif rank == 4:
                    if seed <= 2:
                        playoff_wins.append(0)
                    else:
                        playoff_wins.append(1)
                elif rank == 5:
                    playoff_wins.append(1)
                elif rank == 6:
                    playoff_wins.append(0)
                elif rank == 7:
                    playoff_wins.append(1)
                elif rank == 8:
                    playoff_wins.append(0)
            elif num_managers == 6:
                if rank == 1:
                    playoff_wins.append(2)
                elif rank == 2:
                    playoff_wins.append(1)
                elif rank == 3:
                    playoff_wins.append(1)
                elif rank == 4:
                    playoff_wins.append(0)
                elif rank == 5:
                    playoff_wins.append(1)
                elif rank == 6:
                    playoff_wins.append(0)
        except:
            #this is a band-aid, it should really be based on the number of managers in league.  
            #I'm assuming that this group is all cases where there are two spots outside the playoffs
            try:
                if int(row['rank']) == 7:
                    playoff_wins.append(1)
                elif int(row['rank']) == 8:
                    playoff_wins.append(0)
                elif int(row['rank']) == 9:
                    playoff_wins.append(1)
                elif int(row['rank']) == 10:
                    playoff_wins.append(0)
                else:
                    playoff_wins.append('')
            except:
                playoff_wins.append('')
    season_df['playoff_wins'] = playoff_wins
    playoff_dfs.append(season_df)
playoff_df = pd.concat(playoff_dfs)
playoff_df.drop_duplicates(subset=['team_key'],inplace=True) #I do not know why this is required but aparently dups are created above

playoff_df[['rank','playoff_seed','playoff_wins']]  
#playoff_df.to_csv('playoff_wins_testing.csv',index=False)     


In [None]:
try:
    driver.quit()
except:
    pass
from get_chrome_driver import GetChromeDriver
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--log-level=3")
#chrome_options.add_argument('--headless')
#chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
import time

# Install the driver:
# Downloads ChromeDriver for the installed Chrome version on the machine
# Adds the downloaded ChromeDriver to path
try:
    driver = webdriver.Chrome(options=chrome_options)
except:
    get_driver = GetChromeDriver()
    get_driver.install()
    driver = webdriver.Chrome(options=chrome_options)

# Use the installed ChromeDriver with Selenium
yahoo_profile_url = 'https://profiles.sports.yahoo.com/user/5ES5NWOA3D4IPAP4H73VCTBDDM/?sport=football'
username = 'iflyhighsky'
driver.get(yahoo_profile_url)
time.sleep(2)

#find the history button
buttons=driver.find_elements(By.TAG_NAME,"button")
for button in buttons:
    try:
        if button.text == 'History':
            history_button = button
    except:
        pass 
history_button.click()
time.sleep(1)

#collect league urls
football_urls = []
soup = BeautifulSoup(driver.page_source)
for a in soup.find_all('a'):
    url = a.get('href')
    if ('football.fantasysports.yahoo.com/' in url) and ('/f1/') in url:
        football_urls.append(url)

football_urls

In [None]:
football_urls

In [None]:
#gather content for a season 
league_url = 'https://football.fantasysports.yahoo.com/2007/f1/580505' #2007
league_url = 'https://football.fantasysports.yahoo.com/2009/f1/790872' #2009
league_url = 'https://football.fantasysports.yahoo.com/2010/f1/438659' #2010
league_url = 'https://football.fantasysports.yahoo.com/2011/f1/211725' #2011
league_url = 'https://football.fantasysports.yahoo.com/2012/f1/324073' #2012 
league_url = 'https://football.fantasysports.yahoo.com/2013/f1/378899' #2013
league_url = 'https://football.fantasysports.yahoo.com/2014/f1/329823' #2014
league_url = 'https://football.fantasysports.yahoo.com/2015/f1/216667' #2015
league_url = 'https://football.fantasysports.yahoo.com/2016/f1/370526' #2016
league_url = 'https://football.fantasysports.yahoo.com/2017/f1/347949' #2017
league_url = 'https://football.fantasysports.yahoo.com/2018/f1/153194' #2018
league_url = 'https://football.fantasysports.yahoo.com/2019/f1/102231' #2019
league_url = 'https://football.fantasysports.yahoo.com/2020/f1/523631' #2020
league_url = 'https://football.fantasysports.yahoo.com/2021/f1/696126' #2021
league_url = 'https://football.fantasysports.yahoo.com/2022/f1/709240' #2022
#url= 'https://football.fantasysports.yahoo.com/f1/1399238'

driver.get(league_url)
time.sleep(2)
try:
    driver.find_element(By.XPATH,"//label[@for='login-username']") #if this succeeds then we assume login is being requested
    print('Login required...')
    username_input = driver.find_element(By.XPATH,"//input[@id='login-username']")
    username = 'iflyhighsky'
    username_input.send_keys(username)
    next_button = driver.find_element(By.XPATH,"//input[@value='Next']")
    next_button.click()
    time.sleep(2)
    send_button = driver.find_element(By.XPATH,"//button[@name='send']")
    send_button.click()
    print('NOW - CHECK YOUR PHONE AND TAKE US THE REST OF THE WAY')
except:
    print('Login was not required')
    pass

# WE NEED TO COLLECT:
# FAAB SPEND
# WEEKLY PLAYER SCORES SO I CAN PARSE POINTS FROM DRAFTED VS PI?  CKUPS
# OVERALL PLAYER SCORES SO I CAN RANK THEM....ACTUALY DO I NEED THIS

In [None]:
#collect schedule urls
schedule_url = driver.find_element(By.XPATH,"//a[@data-target='#lhstschedtab']").get_attribute('href')
driver.get(schedule_url)

schedule_nav_html = driver.find_element(By.XPATH,"//ul[@id='schedsubnav']").get_attribute('innerHTML')
schedule_nav_soup = BeautifulSoup(schedule_nav_html)
nav_elems = schedule_nav_soup.find_all('li')

manager_schedule_urls = []
for n in nav_elems:
    elem_url = 'https://football.fantasysports.yahoo.com/' + n.find('a',href=True).get('href')
    manager = n.text.strip()
    manager_schedule_urls.append((manager, elem_url))

#collect weekly matchup urls
manager_matchup_urls = []
for m in manager_schedule_urls:
    print(f'Collecting matchup urls for {m[0]}')
    driver.get(m[1])

    #find the urls for weekly
    table_html = driver.find_element(By.XPATH,"//table[@class='Table Table-interactive']").get_attribute('innerHTML')
    table_soup = BeautifulSoup(table_html)
    table_elems = table_soup.find_all('tr')

    matchup_urls = []
    for t in table_elems:
        for a in t.find_all('a'):
            try:
                url = a.get('href')
                if 'matchup' in url:
                    matchup_urls.append('https://football.fantasysports.yahoo.com/' + url)
                    break
            except:
                pass
    manager_matchup_urls.append((m[0],matchup_urls))

manager_matchup_urls

In [None]:
#now getting weekly matchup data
def get_matchup_data(matchup_url):
    left_matchup_id = matchup_url.split('mid1=')[1].split('&')[0]
    right_matchup_id = matchup_url.split('mid2=')[1]
    driver.get(matchup_url)
    time.sleep(2)

    matchup_link_elems = driver.find_elements(By.XPATH,'//a[@class="F-link"]')
    left_manager = matchup_link_elems[0].text
    left_manager_url = matchup_link_elems[0].get_attribute('href')
    left_manager_url_id = left_manager_url.split('/')[-1]
    right_manager = matchup_link_elems[1].text
    right_manager_url = matchup_link_elems[1].get_attribute('href')
    right_manager_url_id = right_manager_url.split('/')[-1]

    if left_matchup_id == left_manager_url_id:
        manager_match = 'left'
        opponent_id = right_manager_url_id
    elif left_matchup_id == right_manager_url_id:
        manager_match = 'right'
        opponent_id = left_manager_url_id
    else:
        print('Error when matching manager ID')

    week_results_html = driver.find_element(By.XPATH,"//table[@id='statTable1']").get_attribute('innerHTML')
    matchup_soup = BeautifulSoup(week_results_html)

    starter_positions = []
    for p in matchup_soup.find_all("td",{"class":"Va-top Bg-shade F-shade Ta-c"}):
        if 'TOTAL' not in p.text:
            starter_positions.append(p.text)

    matchup_players = matchup_soup.find_all("div", {"class": "ysf-player-name Nowrap Grid-u Relative Lh-xs Ta-start"})
    left_players = []
    right_players = []
    n=0
    for m in matchup_players:
        try:
            player_name = m.text
            player_url = m.find('a').get('href')
        except:
            player_name = ''
            player_url = ''
        if n == 0:
            left_players.append((player_name,player_url))
            n = 1
        else:
            right_players.append((player_name,player_url))
            n = 0

    left_week_scores = matchup_soup.find_all("td",{"class":"Pend-lg Ta-end Fw-b Nowrap Va-top"})
    right_week_scores = matchup_soup.find_all("td",{"class":"Ta-end Fw-b Nowrap Va-top"})

    week_df = pd.DataFrame(starter_positions,columns=['position'])
    week_df['manager_id'] = left_matchup_id
    week_df['opponent_id'] = opponent_id
    week_df['matchup_url'] = matchup_url
    if manager_match == 'left':
        scores = []
        for s in left_week_scores:
            scores.append(s.text)
        scores = scores[:-1]
        week_df[['player','player_url']] = left_players
        week_df['score'] = scores
    elif manager_match == 'right':
        scores = []
        for s in right_week_scores:
            scores.append(s.text)
        scores = scores[:-1]
        week_df[['player','player_url']] = right_players
        week_df['score'] = scores   

    time.sleep(1) 

    return week_df

manager_week_dfs = []
for m in manager_matchup_urls:
    week_dfs = []
    manager = m[0]
    for matchup_url in m[1]:
        week_dfs.append(get_matchup_data(matchup_url))
    full_manager_week_df = pd.concat(week_dfs)
    full_manager_week_df['manager'] = manager
    manager_week_dfs.append(full_manager_week_df)
full_weekly_data_df = pd.concat(manager_week_dfs)
full_weekly_data_df['league_url'] = league_url

In [None]:
full_weekly_data_df

In [None]:
full_weekly_data_df.to_csv('2022_pre_matchups.csv')

In [None]:
pre_league_urls = ['https://football.fantasysports.yahoo.com/2007/f1/580505',
'https://football.fantasysports.yahoo.com/2009/f1/790872',
'https://football.fantasysports.yahoo.com/2010/f1/438659',
'https://football.fantasysports.yahoo.com/2011/f1/211725',
'https://football.fantasysports.yahoo.com/2012/f1/324073',
'https://football.fantasysports.yahoo.com/2013/f1/378899',
'https://football.fantasysports.yahoo.com/2014/f1/329823',
'https://football.fantasysports.yahoo.com/2015/f1/216667',
'https://football.fantasysports.yahoo.com/2016/f1/370526',
'https://football.fantasysports.yahoo.com/2017/f1/347949',
'https://football.fantasysports.yahoo.com/2018/f1/153194',
'https://football.fantasysports.yahoo.com/2019/f1/102231',
'https://football.fantasysports.yahoo.com/2020/f1/523631',
'https://football.fantasysports.yahoo.com/2021/f1/696126',
'https://football.fantasysports.yahoo.com/2022/f1/709240']
pre_league_urls

In [None]:
league_url = 'https://football.fantasysports.yahoo.com/2022/f1/709240'
driver.get(league_url)
#get playoff matchup urls
driver.execute_script("window.scrollTo(0, 700)") 
grid_panes = driver.find_elements(By.XPATH,"//div[@class='Grid-u-1-3 Ta-c']")
if len(grid_panes) == 0:
    #handling 6p leagues
    grid_panes = driver.find_elements(By.XPATH,"//div[@class='Grid-u-1-2 Ta-c']")

quarterfinals = grid_panes[0].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket quarterfinal']")
if len(quarterfinals) > 0:
    grid_index_modifier = 0
else:
    grid_index_modifier = 1
semifinals = grid_panes[1 - grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket semifinal']")
fifth_place = grid_panes[1 - grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket place_5']")
finals = grid_panes[2 - grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket final']")
third_place = grid_panes[2 - grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket place_3']")

playoff_matchup_urls = []
if grid_index_modifier == 0:
    sections = [quarterfinals, semifinals, fifth_place, third_place, finals]
else:
    sections = [semifinals, fifth_place, third_place, finals]
for section in sections:
    for e in section:
        matchup_path = e.get_attribute('data-target')
        matchup_url = 'https://football.fantasysports.yahoo.com' + matchup_path
        playoff_matchup_urls.append(matchup_url)

#now switch to consolation
consolation_matchup_urls = []
consolidation_elem = driver.find_element(By.CSS_SELECTOR,"span[id='selectlist_nav']")
consolidation_elem.click()
action = webdriver.ActionChains(driver)
action.move_to_element(consolidation_elem)
action.move_by_offset(0, 50)    # 0px to the right, 50px to bottom
action.click()
action.perform()
time.sleep(1)

grid_panes = driver.find_elements(By.XPATH,"//div[@class='Grid-u-1-3 Ta-c']")
if len(grid_panes) == 0:
    #handling 6p leagues
    grid_panes = driver.find_elements(By.XPATH,"//div[@class='Grid-u-1-2 Ta-c']")
    grid_index_modifier = 1
consolation_semis = grid_panes[1 -grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket semifinal']")
for consolation_semi in consolation_semis:
    matchup_path = consolation_semi.get_attribute('data-target')
    matchup_url = 'https://football.fantasysports.yahoo.com' + matchup_path
    consolation_matchup_urls.append(matchup_url)
grid_panes = driver.find_elements(By.XPATH,"//div[@class='Grid-u-1-3 Ta-c']") #do it again to avoid stale element exception
if len(grid_panes) == 0:
    #handling 6p leagues
    grid_panes = driver.find_elements(By.XPATH,"//div[@class='Grid-u-1-2 Ta-c']") 
    grid_index_modifier = 1
seventh_place = grid_panes[2 - grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket place_7']")
ninth_place = grid_panes[2 - grid_index_modifier].find_elements(By.XPATH,"div[@class='Linkable Bdr Bdr-radius Bg-shade Ta-start yfa-matchup bracket place_9']")

for section in [seventh_place,ninth_place]:
    for e in section:
        matchup_path = e.get_attribute('data-target')
        matchup_url = 'https://football.fantasysports.yahoo.com' + matchup_path
        consolation_matchup_urls.append(matchup_url)

full_playoff_urls = playoff_matchup_urls + consolation_matchup_urls

#now getting weekly matchup data
def get_playoff_matchup_data(matchup_url):
    left_matchup_id = matchup_url.split('mid1=')[1].split('&')[0]
    right_matchup_id = matchup_url.split('mid2=')[1]
    driver.get(matchup_url)
    time.sleep(2)

    matchup_link_elems = driver.find_elements(By.XPATH,'//a[@class="F-link"]')
    left_manager = matchup_link_elems[0].text
    left_manager_url = matchup_link_elems[0].get_attribute('href')
    left_manager_url_id = left_manager_url.split('/')[-1]
    right_manager = matchup_link_elems[1].text
    right_manager_url = matchup_link_elems[1].get_attribute('href')
    right_manager_url_id = right_manager_url.split('/')[-1]

    week_results_html = driver.find_element(By.XPATH,"//table[@id='statTable1']").get_attribute('innerHTML')
    matchup_soup = BeautifulSoup(week_results_html)

    starter_positions = []
    for p in matchup_soup.find_all("td",{"class":"Va-top Bg-shade F-shade Ta-c"}):
        if 'TOTAL' not in p.text:
            starter_positions.append(p.text)

    matchup_players = matchup_soup.find_all("div", {"class": "ysf-player-name Nowrap Grid-u Relative Lh-xs Ta-start"})
    left_players = []
    right_players = []
    n=0
    for m in matchup_players:
        try:
            player_name = m.text
            player_url = m.find('a').get('href')
        except:
            player_name = ''
            player_url = ''
        if n == 0:
            left_players.append((player_name,player_url))
            n = 1
        else:
            right_players.append((player_name,player_url))
            n = 0

    left_week_scores = matchup_soup.find_all("td",{"class":"Pend-lg Ta-end Fw-b Nowrap Va-top"})
    right_week_scores = matchup_soup.find_all("td",{"class":"Ta-end Fw-b Nowrap Va-top"})

    left_df = pd.DataFrame(starter_positions,columns=['position'])
    left_df['manager_id'] = left_manager_url_id
    left_df['opponent_id'] = right_manager_url_id
    left_df['matchup_url'] = matchup_url
    scores = []
    for s in left_week_scores:
        scores.append(s.text)
    scores = scores[:-1]
    left_df[['player','player_url']] = left_players
    left_df['score'] = scores

    right_df = pd.DataFrame(starter_positions,columns=['position'])
    right_df['manager_id'] = right_manager_url_id
    right_df['opponent_id'] = left_manager_url_id
    right_df['matchup_url'] = matchup_url
    scores = []
    for s in right_week_scores:
        scores.append(s.text)
    scores = scores[:-1]
    right_df[['player','player_url']] = right_players
    right_df['score'] = scores   

    time.sleep(1) 

    return [left_df,right_df]

playoff_match_dfs = []
for url in full_playoff_urls:
    playoff_dfs = []
    matchup_data = get_playoff_matchup_data(url)
    left_right_matchups = pd.concat(matchup_data)
    playoff_match_dfs.append(left_right_matchups)
full_playoff_data_df = pd.concat(playoff_match_dfs)
full_playoff_data_df['league_url'] = league_url

full_playoff_data_df




In [None]:
full_playoff_data_df

In [None]:
full_playoff_data_df.to_csv('2022_pre_playoffs.csv',index=False)

In [None]:
#now getting weekly matchup data
def get_playoff_matchup_data(matchup_url):
    left_matchup_id = matchup_url.split('mid1=')[1].split('&')[0]
    right_matchup_id = matchup_url.split('mid2=')[1]
    driver.get(matchup_url)
    time.sleep(2)

    matchup_link_elems = driver.find_elements(By.XPATH,'//a[@class="F-link"]')
    left_manager = matchup_link_elems[0].text
    left_manager_url = matchup_link_elems[0].get_attribute('href')
    left_manager_url_id = left_manager_url.split('/')[-1]
    right_manager = matchup_link_elems[1].text
    right_manager_url = matchup_link_elems[1].get_attribute('href')
    right_manager_url_id = right_manager_url.split('/')[-1]

    week_results_html = driver.find_element(By.XPATH,"//table[@id='statTable1']").get_attribute('innerHTML')
    matchup_soup = BeautifulSoup(week_results_html)

    starter_positions = []
    for p in matchup_soup.find_all("td",{"class":"Va-top Bg-shade F-shade Ta-c"}):
        if 'TOTAL' not in p.text:
            starter_positions.append(p.text)

    matchup_players = matchup_soup.find_all("div", {"class": "ysf-player-name Nowrap Grid-u Relative Lh-xs Ta-start"})
    left_players = []
    right_players = []
    n=0
    for m in matchup_players:
        try:
            player_name = m.text
            player_url = m.find('a').get('href')
        except:
            player_name = ''
            player_url = ''
        if n == 0:
            left_players.append((player_name,player_url))
            n = 1
        else:
            right_players.append((player_name,player_url))
            n = 0

    left_week_scores = matchup_soup.find_all("td",{"class":"Pend-lg Ta-end Fw-b Nowrap Va-top"})
    right_week_scores = matchup_soup.find_all("td",{"class":"Ta-end Fw-b Nowrap Va-top"})

    left_df = pd.DataFrame(starter_positions,columns=['position'])
    left_df['manager_id'] = left_manager_url_id
    left_df['opponent_id'] = right_manager_url_id
    left_df['matchup_url'] = matchup_url
    scores = []
    for s in left_week_scores:
        scores.append(s.text)
    scores = scores[:-1]
    left_df[['player','player_url']] = left_players
    left_df['score'] = scores

    right_df = pd.DataFrame(starter_positions,columns=['position'])
    right_df['manager_id'] = right_manager_url_id
    right_df['opponent_id'] = left_manager_url_id
    right_df['matchup_url'] = matchup_url
    scores = []
    for s in right_week_scores:
        scores.append(s.text)
    scores = scores[:-1]
    right_df[['player','player_url']] = right_players
    right_df['score'] = scores   

    time.sleep(1) 

    return [left_df,right_df]

playoff_match_dfs = []
for url in full_playoff_urls:
    playoff_dfs = []
    matchup_data = get_playoff_matchup_data(url)
    left_right_matchups = pd.concat(matchup_data)
    playoff_match_dfs.append(left_right_matchups)
full_playoff_data_df = pd.concat(playoff_match_dfs)
full_playoff_data_df['league_url'] = league_url



In [7]:
rs_matchups = []
playoff_matchups = []
for year in [2007,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]:
    rs = pd.read_csv(f'{year}_pre_matchups.csv')
    playoffs = pd.read_csv(f'{year}_pre_playoffs.csv')
    rs_matchups.append(rs)
    playoff_matchups.append(playoffs)

all_regular_season = pd.concat(rs_matchups)
all_playoffs = pd.concat(playoff_matchups)

#add some features for regular season
scores=[]
league_ids = []
manager_keys = []
for index, row in all_regular_season.iterrows():
    league_id = row['league_url'].split('/')[-1]
    league_ids.append(league_id)
    manager_key = league_id + '.' + str(row['manager_id'])
    manager_keys.append(manager_key)
    try:
        scores.append(float(row['score']))
    except:
        scores.append(0)
    #should add player id too
all_regular_season['league_id'] = league_ids
all_regular_season['manager_key'] = manager_keys
all_regular_season['score'] = scores

#add some features for playoffs
league_ids = []
scores=[]
manager_keys = []
for index, row in all_playoffs.iterrows():
    league_id = row['league_url'].split('/')[-1]
    league_ids.append(league_id)
    manager_key = league_id + '.' + str(row['manager_id'])
    manager_keys.append(manager_key)
    #should add player id too
    try:
        scores.append(float(row['score']))
    except:
        scores.append(0)
all_playoffs['league_id'] = league_ids
all_playoffs['manager_key'] = manager_keys
all_playoffs['score']= scores

#all_regular_season.to_csv('all_regular_season_thru_2022.csv',index=False)
all_playoffs.to_csv('all_playoffs_thru_2022_revised.csv',index=False)

In [None]:
all_regular_season = pd.read_csv('all_regular_season_thru_2022.csv')
try:
    all_regular_season.drop('Unnamed: 0',axis=1,inplace=True)
except:
    pass

#clean manager_key that I messed up with .1 and .10
clean_manager_keys = []
for index, row in all_regular_season.iterrows():
    manager_key = str(row['league_id']) + '.' + str(row['manager_id'])
    clean_manager_keys.append(manager_key)
all_regular_season['manager_key'] = clean_manager_keys

rs_groupby = all_regular_season[['manager_key','score']].groupby(['manager_key']).aggregate('sum')
rs_groupby.rename(columns={'score':'rs_score'},inplace=True)
rs_groupby.reset_index(inplace=True)
rs_groupby['manager_key'] = rs_groupby['manager_key'].astype('str')

standings_master_w_s=standings_master.merge(rs_groupby,left_on='manager_key',right_on='manager_key')
standings_master_w_s['points_for'] = standings_master_w_s['points_for'].astype(float)
standings_master_w_s['diff'] = standings_master_w_s['points_for'] - standings_master_w_s['rs_score']

all_playoffs = pd.read_csv('all_playoffs_thru_2022.csv')

#clean manager_key that I messed up with .1 and .10
clean_manager_keys = []
for index, row in all_playoffs.iterrows():
    manager_key = str(row['league_id']) + '.' + str(row['manager_id'])
    clean_manager_keys.append(manager_key)
all_playoffs['manager_key'] = clean_manager_keys
p_groupby = all_playoffs[['manager_key','score']].groupby(['manager_key']).aggregate('sum')
p_groupby.rename(columns={'score':'p_score'},inplace=True)
p_groupby.reset_index(inplace=True)

consolidated_master = standings_master_w_s.merge(p_groupby,how='left', left_on='manager_key',right_on='manager_key')
consolidated_master


In [None]:
#append playoff wins that I neglected to include at earlier step...
playoff_wins_dict = {}
for index, row in playoff_df.iterrows():
    playoff_wins_dict[row['team_key']] = row['playoff_wins']

consolidated_master['playoff_wins'] = consolidated_master.team_key.map(playoff_wins_dict)
consolidated_master.playoff_wins.value_counts()

In [None]:
consolidated_master.to_csv('consolidated_master.csv')

In [None]:
#merge aggregations with standings master
#add some keys we need to standings df
team_ids = []
league_ids = []
manager_keys = []
for index, row in standings_master.iterrows():
    team_id = row['team_key'].split('.')[-1]
    league_id = row['league_key'].split('.')[-1]
    manager_key = league_id + '.' + team_id
    team_ids.append(team_id)
    league_ids.append(league_id)
    manager_keys.append(manager_key)
standings_master['team_id'] = team_ids
standings_master['league_id'] = league_ids
standings_master['manager_key'] = manager_keys

standings_master

In [17]:
consolidated_master = pd.read_csv('consolidated_master.csv')

all_playoffs = pd.read_csv('all_playoffs_thru_2022_revised.csv')

#clean manager_key that I messed up with .1 and .10
clean_manager_keys = []
for index, row in all_playoffs.iterrows():
    manager_key = str(row['league_id']) + '.' + str(row['manager_id'])
    clean_manager_keys.append(manager_key)
all_playoffs['manager_key'] = clean_manager_keys
p_groupby = all_playoffs[['manager_key','score']].groupby(['manager_key']).aggregate('sum')
p_groupby.rename(columns={'score':'p_score'},inplace=True)
p_groupby.reset_index(inplace=True)

p_score_dict = {}
for index, row in p_groupby.iterrows():
    p_score_dict[row['manager_key']] = row['p_score']
consolidated_master['manager_key'] = consolidated_master['manager_key'].astype(str)
consolidated_master['p_score_revised'] = consolidated_master.manager_key.map(p_score_dict)
consolidated_master.to_csv('consolidated_master_revised.csv')

In [15]:
consolidated_master.manager_key.dtype

dtype('float64')

In [None]:
#assemble all standings data for all leagues
standings = []
for league in leagues:
    l = football.to_league(league)
    standings.append((league,l.standings()))

standings_dfs = []
for s in standings:

    #method to compile standings data (minus faab)
    standings_df = pd.DataFrame(s[1])

    dfs=[]
    for a in standings_df.outcome_totals.values:
        a_df = pd.DataFrame([a])
        dfs.append(a_df)
    array_df = pd.concat(dfs)
    array_df.reset_index(drop=True,inplace=True)

    full_standings = pd.merge(standings_df,array_df,left_index=True,right_index=True)

    #get moves and faab from manager_df
    moves = []
    faab_budget = []
    for manager in full_standings.team_key:
        moves.append(manager_df[manager_df.l_manager_key==manager].number_of_moves.values[0])
        faab_budget.append(manager_df[manager_df.l_manager_key==manager].faab_balance.values[0])
    full_standings['moves'] = moves
    full_standings['faab_balance'] = faab_budget
    full_standings['league_key'] = s[0]
    standings_dfs.append(full_standings)
standings_master = pd.concat(standings_dfs)
standings_master

In [None]:
historical_drafts = pd.read_excel('Historical draft.xlsx')
lkup_concs = []
for index, row in historical_drafts.iterrows():
    lkup_concs.append(row['Standard name'] + row['Position'])
historical_drafts['lkup_conc'] = lkup_concs

names_dict_df = pd.read_csv('names_dict.csv')
for index, row in names_dict_df.iterrows():
    names_dict[row['player_conc']] = row['player_id']

player_ids = []
for s in historical_drafts['lkup_conc']:
    try:
        player_ids.append(names_dict[s])
    except:
        player_ids.append('')

historical_drafts['player_id']= player_ids
historical_drafts[historical_drafts.player_id == '']

In [None]:
#now a process to ingest a NEW season's draft and calculate the relevant statistics that we will use
draft = pd.read_excel('Draft Results/2021.xlsx')
draft