# A04. Bullpens
Sources: 
- Team pages on mlb.com
- Wayback Machine (if necessary)

In [21]:
# This scrapes the bullpen depth chart for teams via their website or via the Wayback Machine
# Top reliever will be the closer. Usually other high-leverage pitchers will be near top
# Need this header to trick site into thinking this isn't a scrape
header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
    }

# Extract bullpen information from MLB.com depth charts
def scrape_bullpen(mlburl, header, abbrev, date=None):
    # URL
    # url = f"https://web.archive.org/web/{date}/https://www.mlb.com/{mlburl}/roster/depth-chart"
    url = f"https://www.mlb.com/{mlburl}/roster/depth-chart"
    
    # Get data from URL
    r = requests.get(url, headers=header)
    dfs = pd.read_html(r.text, encoding='iso-8859-1')
    # Bullpen can be one of two tables
    try:
        df = dfs[2]
        # Remove if they're on IL
        df = df[df["Bullpen.1"].str.contains("IL-")==False].reset_index()
        # Or in the minors
        df = df[df["Bullpen.1"].str.contains(" Minors")==False].reset_index()
    except:
        df = dfs[1]
        df = df[df["Bullpen.1"].str.contains("IL-")==False].reset_index()  
        df = df[df["Bullpen.1"].str.contains(" Minors")==False].reset_index()
        
    # Assume leverage is 0 (these pitchers will never come into a game, reserved for SPs on days off)
    df['Leverage'] = 0
    # Loop through rows
    for i in range(len(df)):
        # The top guy should be the closer
        if i == 0:
            df['Leverage'][i] = 4
        # Then the next five are set up/high leverage
        elif i < 4:
            df['Leverage'][i] = 3
        # Then low leverage
        elif i < 11:
            df['Leverage'][i] = 2

    # Extract name from column Bullpen.1
    df[['Name', 'drop']] = df['Bullpen.1'].str.split("B/T", expand=True)
    # Remove numbers
    df['Name'] = df['Name'].str.replace('\d+', '')
    # Remove closer tag
    df['Name'] = df['Name'].str.replace("\(CL\)", '')
    
    # Clean name
    df['Name'] = df.apply(lambda x: remove_accents(x['Name']), axis=1)  # remove accents
    df['Name'] = df['Name'].str.strip()
    
    # Keep Name, Bats/Throws, Leverage
    df = df[['Name', 'B/T', 'Leverage']]
    
    # Date
    df['date'] = date
    # Baseball Reference team abbreviation
    df['BBREFTEAM'] = abbrev
    
    return df