In [1]:
#Import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv
import time

In [None]:
def scrape_baseball_reference(url, writer):
    response = requests.get(url) #Get HTML
    if response.status_code != 200:
        print("Failed to fetch page")
        return
    soup = BeautifulSoup(response.text, 'html.parser') #Parse HTML

    # Find AL MVP table
    al_mvp_table = soup.find('table', {'id': 'AL_MVP_voting'})
    if al_mvp_table:
        scrape_table(al_mvp_table, writer, url)

    # Find NL MVP table
    nl_mvp_table = soup.find('table', {'id': 'NL_MVP_voting'})
    if nl_mvp_table:
        scrape_table(nl_mvp_table, writer, url)

def scrape_table(table, writer, year_url):
    year = year_url.split('_')[-1].split('.')[0]  # Extract year from URL
    for tr in table.find_all('tr')[1:]: # Find tables
        row_data = [year]  # Add year as the first element
        row_data.extend(td.text.strip() for td in tr.find_all('td'))
        writer.writerow(row_data)

def gen_base_urls(start_yr, end_yr): # Generates base urls to scrape
    base_urls = []
    for year in range(start_yr, end_yr + 1):
        base_url = f"https://www.baseball-reference.com/awards/awards_{year}.shtml"
        base_urls.append(base_url)
    return base_urls

# User inputs
start_year = int(input("Enter start year: "))
end_year = int(input("Enter end year: "))
output_file = f'mvp_voting_{start_year}_{end_year}.csv'

# Write csv
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    column_headers = ["Year", "Name", "Tm", "Vote Pts", "1st Place", "Share", "WAR", "G", "AB", "R", "H", "HR", "RBI", "SB", "BB", "BA", "OBP", "SLG", "OPS", "W", "L", "ERA", "WHIP", "G", "GS", "SV", "IP", "H", "HR", "BB", "SO"]
    writer.writerow(column_headers)

    base_urls = gen_base_urls(start_year,end_year)
    
    # Scraping each year
    for base_url in base_urls:
        scrape_baseball_reference(base_url, writer)
        year = base_url.split('_')[-1].split('.')[0]
        print(f"Data scraped for {year}")

        time.sleep(2)  # Avoiding being flagged

Enter start year:  1993
Enter end year:  2023


Data scraped for 1993
Data scraped for 1994
Data scraped for 1995
Data scraped for 1996
Data scraped for 1997
