This is the notebook where I pull the list of probable pitchers from b-ref

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import matplotlib.pyplot as plt
import pandas as pd
import requests
import urllib.request
import numpy as np
import math
from datetime import date

from bs4 import BeautifulSoup

In [2]:
# Request setup
url = "https://www.baseball-reference.com/previews/"

with urllib.request.urlopen(url) as response:
    html = response.read()

soup = BeautifulSoup(html, 'html.parser')


In [3]:
# Get the list of probables
probables = []
probables = pd.DataFrame()

# We're going to loop through this list of matchups
matchups = soup.find_all('div', class_='game_summary nohover')

# For each matchup in the full list of matchups
for matchup in matchups:
    # Get the first pitcher's name
    p1_name = matchup.find_all('a')[3].text
    
    # Get the first pitcher's code
    try: p1_code = matchup.find_all('a')[3].get('href').split("/")[5].split(".")[0]
    except: p1_code = np.nan
    
    # Get the first pitcher's team
    team1 = matchup.find_all('strong')[0].text

    # Get the second pitcher's name
    p2_name = matchup.find_all('a')[4].text
    
    # Get the second pitcher's code
    try: p2_code = matchup.find_all('a')[4].get('href').split("/")[5].split(".")[0]
    except: p2_code = np.nan
    
    # Get the second pitcher's team
    # Had to add a function that handles if there is a debut in the matchup,
    # Which would otherwise change the list of 'strong' divs in the matchup
    debut_adj = math.ceil(len(matchup.find_all('strong'))/2)
    team2 = matchup.find_all('strong')[debut_adj].text

    # Create a dictionary for the first pitcher and their matchup
    pitcher1 = {
        'date':date.today(),
        'name':p1_name,
        'code':p1_code,
        'for':team1,
        'against':team2
    }

    # Create a dictionary for the second pitcher and their matchup
    pitcher2 = {
        'date':date.today(),
        'name':p2_name,
        'code':p2_code,
        'for':team2,
        'against':team1
    }

    # Put both pitchers into a list of probable matchups, separately
    probables = probables.append(pitcher1, ignore_index=True)
    probables = probables.append(pitcher2, ignore_index=True)

probables

Unnamed: 0,date,name,code,for,against
0,2023-09-13,Taj Bradley,bradlta01,TBR,MIN
1,2023-09-13,Dallas Keuchel,keuchda01,MIN,TBR
2,2023-09-13,Jameson Taillon,taillja01,CHC,COL
3,2023-09-13,Ty Blach,blachty01,COL,CHC
4,2023-09-13,Logan Allen,allenlo02,CLE,SFG
5,2023-09-13,Kyle Harrison,harriky01,SFG,CLE
6,2023-09-13,Andrew Wantz,wantzan01,LAA,SEA
7,2023-09-13,Luis Castillo,castilu02,SEA,LAA
8,2023-09-13,Drew Rom,romdr01,STL,BAL
9,2023-09-13,Kyle Gibson,gibsoky01,BAL,STL


In [4]:
# Export to csv
probables.to_csv('probables.csv',index = False)