# <font color='teal'>Yahoo Fantasy Sports Roster Data Extraction</font>

In [1]:
# Import packages
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import pandas as pd
from datetime import datetime
import time

## <font color='teal'>Must authenticate manually on phone before proceeding with scrape</font>

In [2]:
# League base url and team count
league_url = 'https://football.fantasysports.yahoo.com/f1/659111/'
teams = 12
login_email = 'trojanh8er143@yahoo.com'
latest_week = 16
week_tails_range = (1, latest_week)

In [53]:
# Dict containing team names to associate to trailing url int
team_ints_to_names = {
    1:'Child Butt Doctor',
    2:'CreamTeam Curdlers',
    3:'First Round Exit',
    4:'CeeDeeZ Rugged Nuts', # OOO Till Playoffs
    5:'JohnDavidBootyShorts',
    6:'Bidet to You Sir',
    7:'Doctor Steal Yo Girl',
    8:'Betty Boop Would',
    9:'OnlyFans Cam Girl',
    10:'My Dick Sucks',
    11:'Florida Man',
    12:'North Source Boys', # 'axel\'s Notable Team',
}

In [4]:
position_players_count_dict = {
    'QB':1,
    'RB':2,
    'WR':2,
    'TE':1,
    'W/R/T':2,
    'DEF':1
}

In [5]:
position_abbrevs_list = ['QB', 'RB', 'WR', 'TE', 'DEF', 'K',
                         'WR,RB', 'RB,WR', 'QB,WR', 'QB,RB',
                         'QB,TE', 'WR,TE', 'TE,WR', 'TE,RB']

In [6]:
# Align driver
DRIVER_PATH = '\\Users\\David\\Desktop\\Python\\chromedriver\\chromedriver.exe'
driver = webdriver.Chrome(executable_path=DRIVER_PATH)
driver.get(league_url)

In [7]:
# Login steps xpaths
username_xpath = '//*[@id="login-username"]'
next_button_xpath = '//*[@id="login-signin"]'

# Send email login
driver.find_element_by_xpath(username_xpath).send_keys(login_email)

# Click next, then authenticate on phone
driver.find_element_by_xpath(next_button_xpath).click()

## <font color='teal'>Must authenticate manually on phone before proceeding with scrape</font>

In [8]:
# Build urls to visit
urls_list = []
for team in range(1, teams+1):
    for week in range (1, latest_week+1):
        page_nav = league_url + str(team) + '/team?&week=' + str(week)
        urls_list.append(page_nav)

In [9]:
# Function to retrieve roster data
def rosterExtract(url, team_ints_to_names):
    
    try:

        checkpoint = 0

        team_name = team_ints_to_names[int(
            url.split('659111/', 1)[1].split('/', 1)[0])] # retrieve team
        week_num = int(url.split('team?&week=', 1)[1]) # retrieve week
        print('Beginning', team_name, 'week', week_num)
        driver.get(url) # navigate to each team's roster per week
        time.sleep(1) # force wait for loading
        
        checkpoint = 1

        # Loop to retrieve players by href
        players_list = []
        elements = driver.find_elements_by_xpath('//a[contains(@href, "https://sports.yahoo.com/nfl/")]')
        for element in elements:
            if element.text != '' and\
            '@' not in element.text and\
            'StatTracker' not in element.text and\
            'vs' not in element.text:
                players_list.append(element.text)

        checkpoint = 2
        
        # Loop to retrieve game statuses by class
        game_status_list = []
        win_loss_list = []
        elements = driver.find_elements_by_class_name('ysf-game-status')
        for element in elements:
            if 'Bye' in element.text:
                game_status_list.append('Bye')
            elif 'vs' in element.text:
                game_status_list.append('Home')
            elif '@' in element.text:
                game_status_list.append('Away')
            else:
                game_status_list.append('N/A')
        for element in elements:
            if 'Bye' in element.text:
                win_loss_list.append('Bye')
            elif 'Final W' in element.text:
                win_loss_list.append('W')
            elif 'Final L' in element.text:
                win_loss_list.append('L')
            elif 'Final T' in element.text:
                win_loss_list.append('T')
            else:
                win_loss_list.append('N/A')
        
        checkpoint = 3

        # Loop to retrieve positions by class
        positions_list = []
        elements = driver.find_elements_by_class_name('pos-label')
        for element in elements:
            positions_list.append(element.text)

        checkpoint = 4

        # Loop to retrieve player teams and positions by class
        player_team_list = []
        player_pos_list = []
        elements = driver.find_elements_by_class_name('Fz-xxs')
        for element in elements:
            if ' - ' in element.text:
                element_tuple = element.text.split(' - ', 1)
                team = element_tuple[0]
                pos = element_tuple[1]
                if pos in position_abbrevs_list:
                    player_team_list.append(team)
                    player_pos_list.append(pos)

        checkpoint = 5

        # Loop to retrieve points by href
        points_as_strings = []
        counter = 0
        elements = driver.find_elements_by_xpath('//a[contains(@href, "#pps-")]')
        for element in elements:
            if element.text != '':
                points_as_strings.append(element.text)
        points_as_floats = [float(pt) for pt in points_as_strings] # Convert strings to floats
        
        checkpoint = 6
        
        # Insert zero floats where player on bye
        game_status_indexes = []
        counter = 0
        for status in game_status_list:
            counter += 1
            if status == 'Bye':
                game_status_indexes.append(counter-1)
        for i in game_status_indexes:
            points_as_floats.insert(i, 0.00)

        checkpoint = 7
                
        # Insert "Empty" or zero float to lists where Bye week is Null
        empty_spot_indexes = []
        counter = 0
        for status in game_status_list:
            counter += 1
            if status == 'N/A':
                empty_spot_indexes.append(counter-1)
        for i in empty_spot_indexes:
            players_list.insert(i, 'Empty')
            player_team_list.insert(i, 'Empty')
            player_pos_list.insert(i, 'Empty')
            points_as_floats.insert(i, 0.0)
        
        checkpoint = 8

        # Build table from lists, write to DataFrame
        roster_df = pd.DataFrame(
            columns=['Fantasy Team', 'Week', 'Player', 'Roster Status', 'Game Status',
                     'Win-Loss Status', 'Player Team', 'Position', 'Points'])
        roster_df['Player'] = players_list
        roster_df['Roster Status'] = positions_list
        roster_df['Game Status'] = game_status_list
        roster_df['Win-Loss Status'] = win_loss_list
        roster_df['Player Team'] = player_team_list
        roster_df['Position'] = player_pos_list
        roster_df['Points'] = points_as_floats
        roster_df['Fantasy Team'] = team_name
        roster_df['Week'] = week_num

        print('Completed', team_name, 'week', week_num)
        print('')
        print('---')
        print('')

        return roster_df
    
    except Exception as e:
        print('Failed at checkpoint', str(checkpoint))
        print(e)
        
        return (players_list, positions_list, player_team_list,
                player_pos_list, points_as_floats);

In [14]:
%%time

# Run function, append to master DataFrame
master_rosters_df = pd.DataFrame(
    columns=['Fantasy Team', 'Week', 'Player', 'Roster Status', 'Game Status',
             'Win-Loss Status', 'Player Team', 'Position', 'Points'])

for url in urls_list:
    temp_df = rosterExtract(url, team_ints_to_names)
    master_rosters_df = pd.concat([master_rosters_df, temp_df], axis=0)

Beginning Child Butt Doctor week 1
Completed Child Butt Doctor week 1

---

Beginning Child Butt Doctor week 2
Completed Child Butt Doctor week 2

---

Beginning Child Butt Doctor week 3
Completed Child Butt Doctor week 3

---

Beginning Child Butt Doctor week 4
Completed Child Butt Doctor week 4

---

Beginning Child Butt Doctor week 5
Completed Child Butt Doctor week 5

---

Beginning Child Butt Doctor week 6
Completed Child Butt Doctor week 6

---

Beginning Child Butt Doctor week 7
Completed Child Butt Doctor week 7

---

Beginning Child Butt Doctor week 8
Completed Child Butt Doctor week 8

---

Beginning Child Butt Doctor week 9
Completed Child Butt Doctor week 9

---

Beginning Child Butt Doctor week 10
Completed Child Butt Doctor week 10

---

Beginning Child Butt Doctor week 11
Completed Child Butt Doctor week 11

---

Beginning Child Butt Doctor week 12
Completed Child Butt Doctor week 12

---

Beginning Child Butt Doctor week 13
Completed Child Butt Doctor week 13

---

Begi

Completed Doctor Steal Yo Girl week 10

---

Beginning Doctor Steal Yo Girl week 11
Completed Doctor Steal Yo Girl week 11

---

Beginning Doctor Steal Yo Girl week 12
Completed Doctor Steal Yo Girl week 12

---

Beginning Doctor Steal Yo Girl week 13
Completed Doctor Steal Yo Girl week 13

---

Beginning Doctor Steal Yo Girl week 14
Completed Doctor Steal Yo Girl week 14

---

Beginning Doctor Steal Yo Girl week 15
Completed Doctor Steal Yo Girl week 15

---

Beginning Doctor Steal Yo Girl week 16
Completed Doctor Steal Yo Girl week 16

---

Beginning Betty Boop Would week 1
Completed Betty Boop Would week 1

---

Beginning Betty Boop Would week 2
Completed Betty Boop Would week 2

---

Beginning Betty Boop Would week 3
Completed Betty Boop Would week 3

---

Beginning Betty Boop Would week 4
Completed Betty Boop Would week 4

---

Beginning Betty Boop Would week 5
Completed Betty Boop Would week 5

---

Beginning Betty Boop Would week 6
Completed Betty Boop Would week 6

---

Beginni

In [15]:
# master_rosters_df.head()

In [16]:
# Write DataFrame to csv in project directory
dt_string = datetime.now().strftime('%Y%m%d_%H%M%S')
master_rosters_df.to_csv(
    'C:\\Users\\David\\Desktop\\Fantasy Football\\Blastoff Boys Rosters\\Actual Rosters\\blastoff_boys_roster_scrape_{}.csv'.format(dt_string))

## <font color='orange'>Calculate Optimal Lineup</font>

In [17]:
# Loop through teams and weeks, constructing optimal lineups
team_holding_df = pd.DataFrame(columns=master_rosters_df.columns)
for team in list(team_ints_to_names.values()):
    weekly_holding_df = pd.DataFrame(columns=master_rosters_df.columns)
    for week in range (1, latest_week+1):

        # Create that week's df
        condensed_df = master_rosters_df[(
            master_rosters_df['Fantasy Team']==team) & (master_rosters_df['Week']==week)]

        # Retrieve optimal scorers for singular positions (excluding flex)
        holding_df = pd.DataFrame(columns=master_rosters_df.columns)
        for pos in ['QB', 'RB', 'WR', 'TE', 'DEF']: # 'W/R/T'
            position_df = condensed_df[(condensed_df['Position'] == pos)]
            pos_count = position_players_count_dict[pos]
            opt_pos_df = position_df.sort_values('Points', ascending=False).iloc[0:pos_count]
            holding_df = pd.concat([holding_df, opt_pos_df], axis=0)

        # Remove chosen players from orig df by index, filter to W/R/T, calc optimal flexes
        flex_only_df = condensed_df.copy()
        flex_only_df.drop(holding_df.index, inplace=True)
        flex_only_df = flex_only_df[(
            flex_only_df['Position'].isin(['RB', 'WR', 'TE', 'WR,RB', 'RB,WR', 'QB,WR',
                                           'QB,RB', 'QB,TE', 'WR,TE', 'TE,WR', 'TE,RB']))]
        flex_count = position_players_count_dict['W/R/T']
        opt_flex_df = flex_only_df.sort_values('Points', ascending=False).iloc[0:flex_count]
        holding_df = pd.concat([holding_df, opt_flex_df], axis=0)
        holding_df.sort_index(ascending=True, inplace=True)

        # Concat week's optimal roster to holding
        weekly_holding_df = pd.concat([weekly_holding_df, holding_df], axis=0)
    
    # Concat team's seasonal optimals to holding
    team_holding_df = pd.concat([team_holding_df, weekly_holding_df], axis=0)

In [18]:
# team_holding_df.head()

In [19]:
# Write DataFrame to csv in project directory
dt_string = datetime.now().strftime('%Y%m%d_%H%M%S')
team_holding_df.to_csv(
    'C:\\Users\\David\\Desktop\\Fantasy Football\\Blastoff Boys Rosters\\Optimal Rosters\\blastoff_boys_optimal_rosters_{}.csv'.format(dt_string))

## <font color='teal'>Transactions Data</font>

In [80]:
# Navigate to transactions page
transactions_url = league_url + 'transactions'
driver.get(transactions_url) # navigate to each team's roster per week

# Navigate to faab bids history
driver.find_element_by_partial_link_text('FAB Offers').click()
time.sleep(1) # force wait for loading

In [81]:
# In order to parse other bids section, need to remove apostraphes, then write back for lookup
non_apostraphe_team_names = []
for team in list(team_ints_to_names.values()):
    non_apostraphe_team_names.append(team.replace("'",''))
apostraphe_lookup_dict = dict(zip(non_apostraphe_team_names, list(team_ints_to_names.values())))

In [82]:
# Establish df to append to
master_faab_df = pd.DataFrame(
    columns=['Player Name', 'Player Team', 'Player Position', 'Bid Date', 'Winning Team', 'Winning Bid',
             'Second Bid', 'Second Bid Team', 'Last Bid', 'Last Bid Team', 'Other Bids'])

while True:
    
    # Loop to retrieve transactions
    bid_player_names_list = []
    bid_player_teams_list = []
    bid_player_pos_list = []
    winning_offers_list = []
    second_offers_list = []
    second_team_list = []
    last_offers_list = []
    last_team_list = []
    other_bids_list = []
    elements = driver.find_elements_by_class_name('No-pstart')
    for element in elements:
        player_name_team = element.text.split('\n')[0].split(' - ')[0]
        last_space_in_player_name = max(
            list([pos for pos, char in enumerate(player_name_team) if char == ' ']))
        player_name = player_name_team[0:last_space_in_player_name]
        player_team = player_name_team[last_space_in_player_name + 1:]
        player_pos = element.text.split('\n')[0].split(' - ')[1].split(' ')[0]
        bid_player_pos_list.append(player_pos)
        # On 12.29.2020 found the below loop double appends flex players (e.g.,
        # having WR,RB appends both WR and RB rather than one element: WR,RB
#         for pos in position_abbrevs_list:
#             if pos in player_pos:
#                 bid_player_pos_list.append(pos)
        winning_offer = int(element.text.split('\n')[1].replace(' Winning Offer', '').replace('$',''))
        
        # Retrieve info about other bids for this player
        other_bids = str(element.text.split('\n')[2:]).replace(' (Lower Offer)', '').replace(
            ' (Lower waiver priority)', '').replace(' (Invalid claim due to previous transaction)', '').replace(
            '[', '').replace(']', '').replace("'", "").replace('"', '')
        bids_split = other_bids.split(',')
        bids_split_cleaned = []
        teams_making_bids = []
        for bid in bids_split:
            bid = bid.replace('$', '')
            for team in non_apostraphe_team_names:
                if team in bid:
                    teams_making_bids.append(apostraphe_lookup_dict[team])
                    bid = bid.replace(team, '')
            bid = bid.replace(' ', '')
            bids_split_cleaned.append(int(bid))
        max_bid = max(bids_split_cleaned) # Find max bid and associated team
        index_max_bid = bids_split_cleaned.index(max_bid)
        team_max_bid = teams_making_bids[index_max_bid]
        second_offers_list.append(max_bid)
        second_team_list.append(team_max_bid)
        min_bid = min(bids_split_cleaned) # Find min bid and associated team
        index_min_bid = bids_split_cleaned.index(min_bid)
        team_min_bid = teams_making_bids[index_min_bid]
        last_offers_list.append(min_bid)
        last_team_list.append(team_min_bid)
        
        # Append other elements to list
        bid_player_names_list.append(player_name)
        bid_player_teams_list.append(player_team)
        winning_offers_list.append(winning_offer)
        other_bids_list.append(other_bids)
    
    # Loop to retrieve team and date of winning bids
    winning_teams = []
    winning_dates = []
    elements = driver.find_elements_by_class_name('Grid-h-top')
    for element in elements:
        team = element.text.split('\n')[0]
        trans_date = element.text.split('\n')[1].split(',')[0] + ' 2020'
        winning_teams.append(team)
        winning_dates.append(trans_date)
    
#     # Loop to retrieve team and date of winning bids
#     winning_teams = []
#     winning_dates = []
#     elements = driver.find_elements_by_class_name('Grid-h-top')
#     for element in elements:
#         team = element.text.split('\n')[0]
#         trans_date = element.text.split('\n')[1].split(',')[0] + ' 2020'
#         winning_teams.append(team)
#         winning_dates.append(trans_date)
    
    faab_df = pd.DataFrame(
        columns=['Player Name', 'Player Team', 'Player Position', 'Bid Date', 'Winning Team', 'Winning Bid',
                 'Second Bid', 'Second Bid Team', 'Last Bid', 'Last Bid Team', 'Other Bids'])
    faab_df['Player Name'] = bid_player_names_list
    faab_df['Player Team'] = bid_player_teams_list
    faab_df['Player Position'] = bid_player_pos_list
    faab_df['Bid Date'] = winning_dates
    faab_df['Winning Team'] = winning_teams
    faab_df['Winning Bid'] = winning_offers_list
    faab_df['Second Bid'] = second_offers_list
    faab_df['Second Bid Team'] = second_team_list
    faab_df['Last Bid'] = last_offers_list
    faab_df['Last Bid Team'] = last_team_list
    faab_df['Other Bids'] = other_bids_list
    
    master_faab_df = pd.concat([master_faab_df, faab_df], axis=0)
    
    try:
        driver.find_element_by_partial_link_text('Next 25').click()
    
    except:
        break

In [87]:
# print(len(bid_player_names_list))
# print(len(bid_player_teams_list))
# print(len(bid_player_pos_list))
# print(len(winning_dates))
# print(len(winning_teams))
# print(len(winning_offers_list))
# print(len(second_offers_list))
# print(len(second_team_list))
# print(len(last_offers_list))
# print(len(last_team_list))
# print(len(other_bids_list))

In [86]:
# Write DataFrame to csv in project directory
dt_string = datetime.now().strftime('%Y%m%d_%H%M%S')
master_faab_df.to_csv(
    'C:\\Users\\David\\Desktop\\Fantasy Football\\Blastoff Boys Rosters\\FAAB\\blastoff_boys_faab_{}.csv'.format(dt_string))

In [85]:
master_faab_df

Unnamed: 0,Player Name,Player Team,Player Position,Bid Date,Winning Team,Winning Bid,Second Bid,Second Bid Team,Last Bid,Last Bid Team,Other Bids
0,John Brown,Buf,WR,Dec 23 2020,Doctor Steal Yo Girl,1,0,First Round Exit,0,First Round Exit,First Round Exit $0
1,Lynn Bowden Jr.,Mia,"WR,RB",Dec 16 2020,CeeDeeZ Rugged Nuts,12,6,Doctor Steal Yo Girl,6,Doctor Steal Yo Girl,Doctor Steal Yo Girl $6
2,Ty Johnson,NYJ,RB,Dec 10 2020,CreamTeam Curdlers,15,5,Bidet to You Sir,0,Doctor Steal Yo Girl,"Bidet to You Sir $5, CeeDeeZ Rugged Nuts $3, D..."
3,Jakobi Meyers,NE,WR,Dec 6 2020,Child Butt Doctor,0,0,First Round Exit,0,First Round Exit,First Round Exit $0
4,Los Angeles,LAR,DEF,Dec 6 2020,First Round Exit,0,0,First Round Exit,0,First Round Exit,First Round Exit $0
5,Zach Ertz,Phi,TE,Dec 4 2020,Florida Man,12,0,CeeDeeZ Rugged Nuts,0,CeeDeeZ Rugged Nuts,CeeDeeZ Rugged Nuts $0
6,Denzel Mims,NYJ,WR,Dec 4 2020,JohnDavidBootyShorts,0,0,First Round Exit,0,First Round Exit,First Round Exit $0
7,Keke Coutee,Hou,WR,Dec 4 2020,Doctor Steal Yo Girl,15,8,Bidet to You Sir,0,CeeDeeZ Rugged Nuts,"Bidet to You Sir $8, CeeDeeZ Rugged Nuts $0, F..."
8,Kansas City,KC,DEF,Dec 4 2020,JohnDavidBootyShorts,58,10,Florida Man,0,First Round Exit,"Florida Man $10, First Round Exit $0"
9,Tony Pollard,Dal,RB,Nov 25 2020,Bidet to You Sir,3,0,First Round Exit,0,First Round Exit,First Round Exit $0


In [79]:
# ### TEST DELETE

# # Loop to retrieve transactions
# test_bid_player_names_list = []
# test_bid_player_teams_list = []
# test_bid_player_pos_list = []
# test_winning_offers_list = []
# test_second_offers_list = []
# test_second_team_list = []
# test_last_offers_list = []
# test_last_team_list = []
# test_other_bids_list = []
# elements = driver.find_elements_by_class_name('No-pstart')
# for element in elements:
#     player_name_team = element.text.split('\n')[0].split(' - ')[0]
#     last_space_in_player_name = max(
#         list([pos for pos, char in enumerate(player_name_team) if char == ' ']))
#     player_name = player_name_team[0:last_space_in_player_name]
#     player_team = player_name_team[last_space_in_player_name + 1:]
#     player_pos = element.text.split('\n')[0].split(' - ')[1].split(' ')[0]
#     test_bid_player_pos_list.append(player_pos)
# #     print(player_pos)
# #     for pos in position_abbrevs_list:
# #         if pos in player_pos:
# #             test_bid_player_pos_list.append(player_pos)
#     winning_offer = int(element.text.split('\n')[1].replace(' Winning Offer', '').replace('$',''))

#     # Retrieve info about other bids for this player
#     other_bids = str(element.text.split('\n')[2:]).replace(' (Lower Offer)', '').replace(
#         ' (Lower waiver priority)', '').replace(' (Invalid claim due to previous transaction)', '').replace(
#         '[', '').replace(']', '').replace("'", "").replace('"', '')
#     bids_split = other_bids.split(',')
#     bids_split_cleaned = []
#     teams_making_bids = []
#     for bid in bids_split:
#         bid = bid.replace('$', '')
#         for team in non_apostraphe_team_names:
#             if team in bid:
#                 teams_making_bids.append(apostraphe_lookup_dict[team])
#                 bid = bid.replace(team, '')
#         bid = bid.replace(' ', '')
#         bids_split_cleaned.append(int(bid))
#     max_bid = max(bids_split_cleaned) # Find max bid and associated team
#     index_max_bid = bids_split_cleaned.index(max_bid)
#     team_max_bid = teams_making_bids[index_max_bid]
#     second_offers_list.append(max_bid)
#     second_team_list.append(team_max_bid)
#     min_bid = min(bids_split_cleaned) # Find min bid and associated team
#     index_min_bid = bids_split_cleaned.index(min_bid)
#     team_min_bid = teams_making_bids[index_min_bid]
#     last_offers_list.append(min_bid)
#     last_team_list.append(team_min_bid)

#     # Append other elements to list
#     test_bid_player_names_list.append(player_name)
#     test_bid_player_teams_list.append(player_team)
#     test_winning_offers_list.append(winning_offer)
#     test_other_bids_list.append(other_bids)

# # Loop to retrieve team and date of winning bids
# test_winning_teams = []
# test_winning_dates = []
# elements = driver.find_elements_by_class_name('Grid-h-top')
# for element in elements:
#     team = element.text.split('\n')[0]
#     trans_date = element.text.split('\n')[1].split(',')[0] + ' 2020'
#     test_winning_teams.append(team)
#     test_winning_dates.append(trans_date)

# # # Loop to retrieve team and date of winning bids
# # winning_teams = []
# # winning_dates = []
# # elements = driver.find_elements_by_class_name('Grid-h-top')
# # for element in elements:
# #     team = element.text.split('\n')[0]
# #     trans_date = element.text.split('\n')[1].split(',')[0] + ' 2020'
# #     winning_teams.append(team)
# #     winning_dates.append(trans_date)

# print(len(test_bid_player_names_list))
# print(len(test_bid_player_teams_list))
# print(len(test_bid_player_pos_list))
# print(len(test_winning_dates))
# print(len(test_winning_teams))
# print(len(test_winning_offers_list))
# print(len(test_second_offers_list))
# print(len(test_second_team_list))
# print(len(test_last_offers_list))
# print(len(test_last_team_list))
# print(len(test_other_bids_list))

In [39]:
# Navigate to transactions page
transactions_url = league_url + 'transactions'
driver.get(transactions_url) # navigate to each team's roster per week

# Navigate to faab bids history
driver.find_element_by_partial_link_text('FAB Offers').click()
time.sleep(1) # force wait for loading

In [40]:
# Loop to retrieve transactions
bid_player_names_list = []
bid_player_teams_list = []
bid_player_pos_list = []
winning_offers_list = []
other_bids_list = []
elements = driver.find_elements_by_class_name('No-pstart')
for element in elements:
    player_name_team = element.text.split('\n')[0].split(' - ')[0]
    last_space_in_player_name = max(
        [pos for pos, char in enumerate(player_name_team) if char == ' '])
    player_name = player_name_team[0:last_space_in_player_name]
    player_team = player_name_team[last_space_in_player_name + 1:]
    player_pos = element.text.split('\n')[0].split(' - ')[1].split(' ')[0]
    for pos in position_abbrevs_list:
        if pos in player_pos:
            bid_player_pos_list.append(pos)
    winning_offer = int(element.text.split('\n')[1].replace(' Winning Offer', '').replace('$',''))
    other_bids = str(element.text.split('\n')[2:]).replace(' (Lower Offer)','').replace(
        ' (Lower waiver priority)','').replace(' (Invalid claim due to previous transaction)','').replace(
        '[','').replace(']','').replace("'","").replace('"','')
    bid_player_names_list.append(player_name)
    bid_player_teams_list.append(player_team)
    winning_offers_list.append(winning_offer)
    other_bids_list.append(other_bids)

In [48]:
print(bid_player_names_list[2])
print(bid_player_teams_list[2])
print(bid_player_pos_list[2])
print(winning_offers_list[2])
print(other_bids_list[2])

Ty Johnson
NYJ
WR
15
Bidet to You Sir $5, CeeDeeZ Rugged Nuts $3, Doctor Steal Yo Girl $0


In [52]:
team_ints_to_names.values()

dict_values(['Child Butt Doctor', 'CreamTeam Curdlers', 'First Round Exit', 'OOO Till Playoffs', 'JohnDavidBootyShorts', 'Bidet to You Sir', 'Doctor Steal Yo Girl', 'Betty Boop Would', 'OnlyFans Cam Girl', 'My Dick Sucks', 'Florida Man', 'North Source Boys'])

In [49]:
bids_split = other_bids_list[2].split(',')
bids_split

bids_split_cleaned = []
teams_making_bids = []
for bid in bids_split:
    bid = bid.replace('$','')
    for team in list(team_ints_to_names.values()):
        if team in bid:
            teams_making_bids.append(team)
            bid = bid.replace(team, '')
    bid = bid.replace(' ', '')
    bids_split_cleaned.append(int(bid))

# Find max bid and associated team
max_bid = max(bids_split_cleaned)
index_max_bid = bids_split_cleaned.index(max_bid)
team_max_bid = teams_making_bids[index_max_bid]

# Find min bid and associated team
min_bid = min(bids_split_cleaned)
index_min_bid = bids_split_cleaned.index(min_bid)
team_min_bid = teams_making_bids[index_min_bid]

ValueError: invalid literal for int() with base 10: 'CeeDeeZRuggedNuts3'

In [50]:
bid

'CeeDeeZRuggedNuts3'

In [45]:
print(max_bid)
print(team_max_bid)
print(min_bid)
print(team_min_bid)

0
First Round Exit
0
First Round Exit


In [46]:
bids_split_cleaned = []
teams_making_bids = []
for bid in bids_split:
    bid = bid.replace('$','')
    for team in list(team_ints_to_names.values()):
        if team in bid:
            teams_making_bids.append(team)
            bid = bid.replace(team, '')
    bid = bid.replace(' ', '')
    bids_split_cleaned.append(int(bid))

# Find max bid and associated team
max_bid = max(bids_split_cleaned)
index_max_bid = bids_split_cleaned.index(max_bid)
team_max_bid = teams_making_bids[index_max_bid]

# Find min bid and associated team
min_bid = min(bids_split_cleaned)
index_min_bid = bids_split_cleaned.index(min_bid)
team_min_bid = teams_making_bids[index_min_bid]

In [47]:
print(max_bid)
print(index_max_bid)
print(team_max_bid)
print(min_bid)
print(index_min_bid)
print(team_min_bid)

0
0
First Round Exit
0
0
First Round Exit


In [32]:
# Loop to retrieve team and date of winning bids
winning_teams = []
winning_dates = []
elements = driver.find_elements_by_class_name('Grid-h-top')
for element in elements:
    team = element.text.split('\n')[0]
    trans_date = element.text.split('\n')[1].split(',')[0] + ' 2020'
    winning_teams.append(team)
    winning_dates.append(trans_date)

In [33]:
bids_split_cleaned

[0]

In [35]:
# print(winning_teams[0:5])
# print(winning_dates[0:5])

In [36]:
faab_df = pd.DataFrame(columns=['Player Name', 'Player Team', 'Player Position',
                                'Winning Team', 'Winning Bid', 'Bid Date', 'Other Bids'])
faab_df['Player Name'] = bid_player_names_list
faab_df['Player Team'] = bid_player_teams_list
faab_df['Player Position'] = bid_player_pos_list
faab_df['Winning Team'] = winning_teams
faab_df['Winning Bid'] = winning_offers_list
faab_df['Bid Date'] = winning_dates
faab_df['Other Bids'] = other_bids_list

ValueError: Length of values does not match length of index

In [37]:
faab_df.head()

Unnamed: 0,Player Name,Player Team,Player Position,Winning Team,Winning Bid,Bid Date,Other Bids
0,John Brown,Buf,,,,,
1,Lynn Bowden Jr.,Mia,,,,,
2,Ty Johnson,NYJ,,,,,
3,Jakobi Meyers,NE,,,,,
4,Los Angeles,LAR,,,,,


## <font color='teal'>Test Individual Team/Week</font>

In [None]:
# test_df = rosterExtract(urls_list[38], team_ints_to_names)

In [None]:
# test_df

In [None]:
# control = 0
# print(len(test_df[control]))
# test_df[control]

## <font color='teal'>Original Component Code</font>

In [None]:
# # Navigate to first team's webpage
# driver.get(team_urls_dict['Team1'])

In [None]:
# # navigate to week 1 for this team
# driver.get(team_urls_dict['Team1']+'/team?&week=1')

In [None]:
# # roster table
# roster_xpath = '//*[@id="statTable0"]'
# driver.find_element_by_xpath(roster_xpath).click()

In [None]:
# # find by partial href url tutorial
# elements = driver.find_elements_by_xpath("//a[contains(@href, '/mathscinet/search/mscdoc.html')]")
# for element in elements:
#     print(element.text)

In [None]:
# # Loop through and retrieve players by href
# players_list = []
# elements = driver.find_elements_by_xpath('//a[contains(@href, "https://sports.yahoo.com/nfl/")]')
# # elements = driver.find_elements_by_xpath('//a[contains(@href, "https://sports.yahoo.com/nfl/players/")]')
# for element in elements:
#     if element.text != '' and\
#     '@' not in element.text and\
#     'StatTracker' not in element.text and\
#     'vs' not in element.text:
#         players_list.append(element.text)

In [None]:
# players_list

In [None]:
# find_elements_by_tag_name
# find_elements_by_class_name

In [None]:
# # Loop through and retrieve game statuses by class
# game_status_list = []
# win_loss_list = []
# elements = driver.find_elements_by_class_name('ysf-game-status')
# for element in elements:
#     if 'Bye' in element.text:
#         game_status_list.append('Bye')
#     elif 'vs' in element.text:
#         game_status_list.append('Home')
#     elif '@' in element.text:
#         game_status_list.append('Away')
#     else:
#         game_status_list.append('N/A')
# for element in elements:
#     if 'Bye' in element.text:
#         win_loss_list.append('Bye')
#     elif 'Final W' in element.text:
#         win_loss_list.append('W')
#     elif 'Final L' in element.text:
#         win_loss_list.append('L')
#     elif 'Final T' in element.text:
#         win_loss_list.append('T')
#     else:
#         win_loss_list.append('N/A')

In [None]:
# game_status_list
# win_loss_list

In [None]:
# # Loop through and retrieve empty roster spots (e.g., played only 1RB, left 2RB empty) by class
# empty_spot_list = []
# elements = driver.find_elements_by_class_name('Nowrap emptyplayer')
# for element in elements:
#     empty_spot_list.append(element.text)

In [None]:
# empty_spot_list

In [None]:
# # Insert "Empty" tag to player roster where Bye week is Null
# empty_spot_indexes = []
# counter = 0
# for status in game_status_list:
#     counter += 1
#     if status == 'N/A':
#         empty_spot_indexes.append(counter-1)
# for i in empty_spot_indexes:
#     players_list.insert(i, 'Empty')

In [None]:
# empty_spot_indexes

In [None]:
# # Loop through and retrieve positions by class
# positions_list = []
# elements = driver.find_elements_by_class_name('pos-label')
# for element in elements:
# #     if element.text != '':
#     positions_list.append(element.text)

In [None]:
# positions_list

In [None]:
# # Loop through and retrieve player teams and positions by class
# player_team_list = []
# player_pos_list = []
# elements = driver.find_elements_by_class_name('Fz-xxs')
# for element in elements:
#     if ' - ' in element.text:
#         element_tuple = element.text.split(' - ', 1)
#         team = element_tuple[0]
#         pos = element_tuple[1]
#         if pos in ['QB', 'RB', 'WR', 'TE', 'DEF', 'K',
#                    'WR,RB', 'RB,WR', 'QB,WR', 'QB,RB',
#                    'QB,TE', 'WR,TE', 'TE,WR', 'TE,RB']:
#             player_team_list.append(team)
#             player_pos_list.append(pos)

In [None]:
# player_team_list
# player_pos_list

In [None]:
# # Loop through and retrieve positions by class
# actual_points = []
# elements = driver.find_elements_by_class_name('pps Fw-b has-stat-note ')
# for element in elements:
# #     if element.text != '':
#     actual_points.append(element.text)

In [None]:
# actual_points

In [None]:
# # Loop through and retrieve actual points by href
# points_as_strings = []
# # counter = 0
# elements = driver.find_elements_by_xpath('//a[contains(@href, "#pps-")]')
# for element in elements:
# #     counter += 1
# #     if game_status_list[counter-1] == 'Bye':
# #         points_as_strings.append('0')
#     if element.text != '':
#         points_as_strings.append(element.text)

# # Convert strings to floats
# points_as_floats = [float(pt) for pt in points_as_strings]

# # Insert zero floats where player on bye
# game_status_indexes = []
# counter = 0
# for status in game_status_list:
#     counter += 1
#     if status == 'Bye':
#         game_status_indexes.append(counter-1)
# for i in game_status_indexes:
#     points_as_floats.insert(i, 0.00)

In [None]:
# points_as_floats

In [None]:
# # Build table from lists, write to DataFrame
# roster_df = pd.DataFrame(columns=['Player', 'Status', 'Player Team', 'Position', 'Points'])
# roster_df['Player'] = players_list
# roster_df['Status'] = positions_list
# roster_df['Player Team'] = player_team_list
# roster_df['Position'] = player_pos_list
# roster_df['Points'] = points_as_floats

In [38]:
# roster_df

In [None]:
# print(len(players_list))
# print(len(positions_list))
# print(len(player_team_list))
# print(len(player_pos_list))
# print(len(points_as_floats))