In [36]:
import re
import time
import requests
from bs4 import BeautifulSoup

In [37]:
class ScrapPlayer():
    def get_url(self, player_id, category_type):
        category = 'batting' if category_type == 'BATTER' else 'pitching'
        return f'https://www.espn.com/mlb/player/gamelog/_/id/{player_id}/year/2022/category/{category}'

    def transform(self, player, cols, category_type):
        id = player['id']
        name = player['name']
        if category_type == 'BATTER':
            return {
                'ID': id,
                'NAME': name,
                'DATE': re.sub(r'.+?([\d/]+)', '\g<1>', cols[0].text),
                'OPP': re.sub(r'(@|vs)(.+)', '\g<1> \g<2>', cols[1].text),
                'RESULT': re.sub(r'(W|L)(.+)', '\g<1> \g<2>', cols[2].text),
                'AB': cols[3].text,
                'R': cols[4].text,
                'H': cols[5].text,
                '2B': cols[6].text,
                '3B': cols[7].text,
                'HR': cols[8].text,
                'RBI': cols[9].text,
                'BB': cols[10].text,
                'HBP': cols[11].text,
                'SO': cols[12].text,
                'SB': cols[13].text,
                'CS': cols[14].text,
                'AVG': cols[15].text,
                'OBP': cols[16].text,
                'SLG': cols[17].text,
                'OPS': cols[18].text,
            }
        
        return {
            'ID': id,
            'NAME': name,
            'DATE': re.sub(r'.+?([\d/]+)', '\g<1>', cols[0].text),
            'OPP': re.sub(r'(@|vs)(.+)', '\g<1> \g<2>', cols[1].text),
            'RESULT': re.sub(r'(W|L)(.+)', '\g<1> \g<2>', cols[2].text),
            'IP': cols[3].text,
            'H': cols[4].text,
            'R': cols[5].text,
            'ER': cols[6].text,
            'HR': cols[7].text,
            'BB': cols[8].text,
            'K': cols[9].text,
            'GB': cols[10].text,
            'FB': cols[11].text,
            'P': cols[12].text,
            'TBF': cols[13].text,
            'GSC': cols[14].text,
            'DEC': cols[15].text,
            'REL': cols[16].text,
            'ERA': cols[17].text,
        }

    def run(self, players, sleep_time_in_seconds = 5):
        pitchers, batters = [], []
        for player in players:
            id = player['id']
            for is_a in player['is_a']:
                url = self.get_url(id, is_a)
                content = requests.get(url).content
                bs = BeautifulSoup(content, features='html.parser')

                section = bs.select('.gamelog')
                if len(section) == 0:
                    print(f'error: {url}')
                    continue

                section = section[0]
                
                try:
                    rows = section.select('.mb5 .Table__TBODY tr')
                    stats = [
                        self.transform(player, row.find_all('td'), is_a)
                        for row
                        in rows
                        if not 'totals_row' in row.get('class')
                    ]

                    (batters if is_a == 'BATTER' else pitchers).extend(stats)
                except:
                    print(f'error: {url}')

                time.sleep(sleep_time_in_seconds)

        return pitchers, batters

In [38]:
import json

rosters = {}
with open('../../data/mlb/roster-853208662.json', 'r') as roster_json: 
  for team_roster in json.loads(roster_json.read()):
    name = team_roster['name']
    rosters[name] = team_roster

all = []
for player in rosters['Snoring Eeyores']['roster']:
  all.append(player)

In [39]:
all[:1]

[{'id': 39878,
  'name': 'Corbin Burnes',
  'is_a': ['PITCHER'],
  'is_on': 'Snoring Eeyores'}]

In [40]:
pitchers, batters = ScrapPlayer().run(all[:1])
pitchers

[<tr class="Table__TR Table__TR--sm Table__even" data-idx="0"><td class="Table__TD">Sun 5/1</td><td class="Table__TD"><span class="flex"><span class="pr2">vs</span><span class="pr2 TeamLink__Logo"><a class="AnchorLink v-mid" data-clubhouse-uid="s:1~l:10~t:16" href="/mlb/team/_/name/chc/chicago-cubs" tabindex="0" title="Team - Chicago Cubs"></a></span><span><a class="AnchorLink v-mid" data-clubhouse-uid="s:1~l:10~t:16" href="/mlb/team/_/name/chc/chicago-cubs" tabindex="0" title="Team - Chicago Cubs">CHC</a></span></span></td><td class="Table__TD"><a class="AnchorLink" data-game-link="true" href="http://www.espn.com/mlb/game/_/gameId/401354569" tabindex="0"><div class="inline flex tl"><div class="inline pr2"><div class="ResultCell tl loss-stat">L</div></div><span>2-0</span></div></a></td><td class="Table__TD">7.0</td><td cl

[{'ID': 39878,
  'NAME': 'Corbin Burnes',
  'DATE': '5/1',
  'OPP': 'vs CHC',
  'RESULT': 'L 2-0',
  'IP': '7.0',
  'H': '4',
  'R': '2',
  'ER': '2',
  'HR': '1',
  'BB': '1',
  'K': '10',
  'GB': '8',
  'FB': '7',
  'P': '97',
  'TBF': '26',
  'GSC': '70.0',
  'DEC': 'L(1-1)',
  'REL': '-',
  'ERA': '1.93'},
 {'ID': 39878,
  'NAME': 'Corbin Burnes',
  'DATE': '4/25',
  'OPP': 'vs SF',
  'RESULT': 'L 4-2',
  'IP': '6.2',
  'H': '2',
  'R': '0',
  'ER': '0',
  'HR': '0',
  'BB': '2',
  'K': '11',
  'GB': '5',
  'FB': '5',
  'P': '106',
  'TBF': '24',
  'GSC': '79.0',
  'DEC': '-',
  'REL': '-',
  'ERA': '1.75'},
 {'ID': 39878,
  'NAME': 'Corbin Burnes',
  'DATE': '4/19',
  'OPP': 'vs PIT',
  'RESULT': 'W 5-2',
  'IP': '7.0',
  'H': '4',
  'R': '2',
  'ER': '2',
  'HR': '2',
  'BB': '0',
  'K': '10',
  'GB': '11',
  'FB': '4',
  'P': '107',
  'TBF': '25',
  'GSC': '71.0',
  'DEC': 'W(1-0)',
  'REL': '-',
  'ERA': '2.37'},
 {'ID': 39878,
  'NAME': 'Corbin Burnes',
  'DATE': '4/13',
  'OP

In [41]:
content = requests.get('https://www.espn.com/mlb/player/gamelog/_/id/39878/year/2022/category/pitching').content
bs = BeautifulSoup(content, features='html.parser')

In [42]:
section = bs.select('.gamelog')
section[0].select('.mb5 .Table__TBODY tr')

[<tr class="Table__TR Table__TR--sm Table__even" data-idx="0"><td class="Table__TD">Sun 5/1</td><td class="Table__TD"><span class="flex"><span class="pr2">vs</span><span class="pr2 TeamLink__Logo"><a class="AnchorLink v-mid" data-clubhouse-uid="s:1~l:10~t:16" href="/mlb/team/_/name/chc/chicago-cubs" tabindex="0" title="Team - Chicago Cubs"></a></span><span><a class="AnchorLink v-mid" data-clubhouse-uid="s:1~l:10~t:16" href="/mlb/team/_/name/chc/chicago-cubs" tabindex="0" title="Team - Chicago Cubs">CHC</a></span></span></td><td class="Table__TD"><a class="AnchorLink" data-game-link="true" href="http://www.espn.com/mlb/game/_/gameId/401354569" tabindex="0"><div class="inline flex tl"><div class="inline pr2"><div class="ResultCell tl loss-stat">L</div></div><span>2-0</span></div></a></td><td class="Table__TD">7.0</td><td cl