In [24]:
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import numpy as np


In [25]:
class Player:
    
    def __init__(self, name):
        self.name = name
        self.url = self.gen_url()
        self.stats = self.career_stats()
        self.soup = self.gen_soup()
       
    def gen_soup(self):
        page = requests.get(self.url)
        soup = BeautifulSoup(page.text, 'html.parser')
        return soup
        
    def gen_url(self):
        name = self.name.lower()
        string = name.split()
        firstInitial = string[1][0]
        firstName = string[0][:2]
        lastName = string[1][:5]
        playerID = lastName + firstName + '01'
        url = 'https://www.baseball-reference.com/players/{}/{}.shtml'.format(firstInitial, playerID)
        return url
    
    def career_stats(self):
        page = requests.get(self.url)
        soup = BeautifulSoup(page.text, 'html.parser')
        attrs = {'class':re.compile('poptip')}
        cols = [col.get_text() for col in soup.find_all('th', {'class':re.compile('poptip')})]
        stats = []
        for season in soup.find_all('tr', {'class': 'full'}):
            temp = [season.find('th').get_text()]
            temp += [stat.get_text() for stat in season.find_all('td')]
            stats += [temp]
        return stats

    def stats_by_year(self, year):
        for season in self.stats:
            if season[0] == year:
                return season
        return 'No stats found for that year'
    
    def gen_stats_table(self):
        cols = [col.get_text() for col in self.soup.find_all('th', {'class':re.compile('poptip')})]
        df = pd.DataFrame(columns=cols)
        for season in self.stats:
            temp_df = pd.DataFrame(season).transpose()
            temp_df.columns = cols
            df = pd.concat([df, temp_df], ignore_index=True)
        return df

In [26]:
judge = Player('Aaron Judge')
judge.gen_stats_table()

Unnamed: 0,Year,Age,Tm,Lg,G,PA,AB,R,H,2B,...,OPS,OPS+,TB,GDP,HBP,SH,SF,IBB,Pos,Awards
0,2016,24,NYY,AL,27,95,84,10,15,2,...,0.608,61,29,2,1,0,1,0,9,
1,2017,25,NYY,AL,155,678,542,128,154,24,...,1.049,171,340,15,5,0,4,11,*9D,"AS,MVP-2,RoY-1,SS"
2,2018,26,NYY,AL,112,498,413,77,115,22,...,0.919,151,218,10,4,0,5,3,9D/8,"AS,MVP-12"
3,2019,27,NYY,AL,50,222,183,33,52,7,...,0.905,141,92,8,1,0,1,3,9/D,


In [27]:
colon = Player('Bartolo Colon')
colon.gen_stats_table()


Unnamed: 0,Year,Age,Tm,Lg,W,L,W-L%,ERA,G,GS,...,BF,ERA+,FIP,WHIP,H9,HR9,BB9,SO9,SO/W,Awards
0,1997,24,CLE,AL,4,7,0.364,5.65,19,17,...,427,83,4.9,1.617,10.2,1.1,4.3,6.3,1.47,
1,1998,25,CLE,AL,14,9,0.609,3.71,31,31,...,883,128,3.75,1.392,9.0,0.7,3.5,7.0,2.0,AS
2,1999,26,CLE,AL,18,5,0.783,3.95,32,32,...,858,126,4.3,1.273,8.1,1.1,3.3,7.1,2.12,CYA-4
3,2000,27,CLE,AL,15,8,0.652,3.88,30,30,...,807,127,3.96,1.388,7.8,1.0,4.7,10.1,2.16,
4,2001,28,CLE,AL,14,12,0.538,4.09,34,34,...,947,110,4.0,1.394,8.9,1.1,3.6,8.1,2.23,
5,2002,29,TOT,MLB,20,8,0.714,2.93,33,33,...,966,147,3.72,1.239,8.4,0.8,2.7,5.7,2.13,CYA-6
6,2003,30,CHW,AL,15,13,0.536,3.87,34,34,...,984,120,4.11,1.198,8.3,1.1,2.5,6.4,2.58,
7,2004,31,ANA,AL,18,12,0.6,5.01,34,34,...,897,89,4.97,1.373,9.3,1.6,3.1,6.8,2.23,
8,2005,32,LAA,AL,21,8,0.724,3.48,33,33,...,906,122,3.75,1.159,8.7,1.1,1.7,6.3,3.65,"AS,CYA-1,MVP-23"
9,2006,33,LAA,AL,1,5,0.167,5.11,10,10,...,251,89,5.33,1.456,11.3,1.8,1.8,5.0,2.82,
