In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

import constants
import utils

Scrape a list of all players from fantasypros

In [2]:
PLAYERS_URL = 'https://www.fantasypros.com/mlb/rankings/ros-overall.php'
HEADERS = ['Rank', 'Player', 'Best', 'Worst', 'Avg', 'Std Dev', 'ADP', 'vs. ADP', 'Notes']

In [3]:
site_data = requests.get(PLAYERS_URL).text
soup = BeautifulSoup(site_data)
table = soup.find('table')

In [4]:
players = []
for row in table.tbody.find_all('tr'):
    player = []
    for data in row.find_all('td'):
        player.append(data.text.strip())
    players.append(player)

In [5]:
df = pd.DataFrame(players, columns=HEADERS)
# Remove unused column
df.drop(columns=['Notes'], inplace=True)


In [6]:
# Parse out player, team and positions
df['Team'] = df['Player'].str.split('(', n=1).str[1].str.split(' ', n=1).str[0]
df['Positions'] = df['Player'].str.split(')', n=1).str[0].str.rsplit(' ', n=1).str[1]
df['Player Name'] = df['Player'].str.split(' \(', n=1).str[0]

In [7]:
# Match players to an MLB ID
df['SearchValue'] = df['Player Name'].str.replace(' ', '')

In [8]:
df['Player JSON'] = df['SearchValue'].apply(utils.search_for_player)

In [12]:
df['Player ID'] = df['Player JSON'].apply(lambda player_dict: player_dict.get('id') if player_dict is not None else -1)
df.drop(columns=['Player JSON', 'SearchValue'], inplace=True)

In [13]:
utils.save_dataframe(df, constants.FANTASYPROS_PLAYERS_FILENAME)