In [181]:
import random
from bs4 import BeautifulSoup
import re
import requests
import pandas as pd
import math

header_name = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
acronym_to_name_dict = {'ATL': 'Atlanta',
                        'WSN': 'Washington',
                        'NYM': 'New York Mets',
                        'PHI': 'Philadelphia',
                        'MIA': 'Miami',
                        'STL': 'St. Louis',
                        'MIL': 'Milwaukee',
                        'CHC': 'Chicago Cubs',
                        'CIN': 'Cincinatti',
                        'PIT': 'Pittsburg',
                        'LAD': 'Los Angeles Dodgers',
                        'ARI': 'Arizona',
                        'SFG': 'San Francisco',
                        'COL': 'Colorado',
                        'SDP': 'San Diego',
                        'NYY': 'New York Yankees',
                        'TBR': 'Tampa Bay',
                        'BOS': 'Boston',
                        'TOR': 'Toronto',
                        'BAL': 'Baltimore',
                        'MIN': 'Minnesota',
                        'CLE': 'Cleveland',
                        'CHW': 'Chicago White Sox',
                        'KCR': 'Kansas City',
                        'DET': 'Detroit',
                        'HOU': 'Houston',
                        'OAK': 'Oakland',
                        'TEX': 'Texas',
                        'LAA': 'Los Angeles Angels',
                        'SEA': 'Seattle'}
acronym_to_name_dict = {'ATL': 'Atlanta'}

In [272]:
class Batter:
    """Class that holds information for a Batter.
    
    Attributes:
        team: team name of player
        name: player name
        singles: fraction of plate appearances ending in a single
        doubles: fraction of plate appearances ending in a double
        triples: fraction of plate appearances ending in a triple
        home_runs: fraction of plate appearances ending in a home run
        walks: fraction of plate appearances ending in a walk
    """
    def __init__(self, team, name, singles, doubles, triples, home_runs, walks):
        """Initializes values for this class"""
        
        self.team = team
        self.name = name
        self.singles = singles
        self.doubles = singles + doubles
        self.triples = singles + doubles + triples
        self.home_runs = singles + doubles + triples + home_runs
        self.walks = singles + doubles + triples + home_runs + walks
        self.cum_singles = 0
        self.cum_doubles = 0
        self.cum_triples = 0
        self.cum_home_runs = 0
        self.cum_walks = 0
        self.cum_pa = 0
        self.cum_runs = 0
        self.cum_rbi = 0
                
    def avg(self):
        """Calculates avg of a player
        
        Returns:
            float: batting average in decimal form
        """
        if(self.cum_pa-self.cum_walks>0):
            return(float((self.cum_singles+self.cum_doubles+self.cum_triples+self.cum_home_runs)/(self.cum_pa-self.cum_walks)))
        else:
            return 0    
    def obp(self):
        """Calculates on base percentage of a player
        
        Returns:
            float: opb in decimal form"""
        if(self.cum_pa>0):
            return((self.cum_singles+self.cum_doubles+self.cum_triples+self.cum_home_runs+self.cum_walks)/(self.cum_pa))
        else:
            return 0
    def slg(self):
        """Calculates slugging percentage of a player
        
        Returns:
            float: slg in decimal form"""
        if(self.cum_pa-self.cum_walks>0):
            return((self.cum_singles+2*self.cum_doubles+3*self.cum_triples+4*self.cum_home_runs)/(self.cum_pa-self.cum_walks))
        else:
            return 0
    def ops(self):
        """Calculates on base plus slugging of a player
        
        Returns:
            float: ops in decimal form"""
        
        return(self.slg() + self.obp())
    def print_stats(self):
        """Prints some stats of a player"""

        print("Avg: " + str(round(self.avg(), 3)))
        print("OBP: " + str(round(self.obp(), 3)))
        print("SLG: " + str(round(self.slg(), 3)))
        print("OPS: " + str(round(self.ops(), 3)))
        print("HR: " + str(self.cum_home_runs))
        
class Pitcher(Batter):
    """Class that holds information for a pitcher.
    
    Attributes:
        team: team name of player
        name: player name
        position: player position
        singles: fraction of plate appearances ending in a single
        doubles: fraction of plate appearances ending in a double
        triples: fraction of plate appearances ending in a triple
        home_runs: fraction of plate appearances ending in a home run
        walks: fraction of plate appearances ending in a walk
    """
    def __init__(self, team, name, singles, doubles, triples, home_runs, walks):
        """Initializes values for this class"""
        
        Batter.__init__(self, team, name, singles, doubles, triples, home_runs, walks)
        
class Team:
    """Class that holds information for a team.
    
    Attributes:
        name: team name
        p1-p9: instances of player class
        roster: batting lineup of team in list form
        index: position in batting order of current batter
        runners: list representing whether a runner is on each base
        runs: number of runs fr a team in a game
        wins: number of wins for a team
        losses: number of losses for a team
        cumulative runs: total runs for a team over a span of games
    """
    def __init__(self, name, batters, pitchers):
        """Initializes values for this class"""
        self.name = name
        self.pitching_staff = pitchers
        self.index = 0
        self.runners = [None, None, None, 0]    # first base-hom plate. 0 for base empty, 1 for runner on
        self.runs = 0
        self.wins = 0
        self.losses = 0
        self.cum_runs = 0
        
        def make_lineup(batters):
            order = {}
            for batter in batters:
                order.update({batter: batter.walks})
            order = [k for k, v in sorted(order.items(), key=lambda item: item[1], reverse=True)]
            return(order[0:9])
        
        self.lineup = make_lineup(batters)
        
    def restart(self):
        """Resets values for position in lineup, runners on base, and runs if a game is to be started"""
        self.index = 0
        self.runners = [None, None, None, 0]
        self.runs = 0
    def reset_runners(self):
        """Resets baserunners after each half inning"""
        self.runners = [None, None, None, 0]
        
class Game:
    """Class that holds information for a Game.
    
    Attributes:
        away_team: away team name
        home_team: home team name
    """
    
    def __init__(self, away_team, home_team):
        """Initializes values for this class"""
        self.home_team = home_team
        self.away_team = away_team
    def get_result(self, player, num):
        """Using player attributes and a random number, a result is created for a plate appearance
        
        Args:
            player: instance of a player class
            num: random number between 0 and 1
        Returns:
            str: refers to result based on random number (ie "walk")
        """
        player.cum_pa+=1
        
        if(num<player.singles):
            player.cum_singles+=1
            return "single"
        elif(num<player.doubles):
            player.cum_doubles+=1
            return "double"
        elif(num<player.triples):
            player.cum_triples+=1
            return "triple"
        elif(num<player.home_runs):
            player.cum_home_runs+=1
            return "home_run"
        elif(num<player.walks):
            player.cum_walks+=1
            return "walk"
        else:
            return "out"  
    def move_runners(self, team, result):
        """Based on the result of a batter's plate appearance, runners are moved and runs may be added to team's total 
        
        Args:
            team: instance team that the batter was on
            result: str of reult for batter (ie "walk")
        """
        if(result=="single"):
            if(team.runners[2]!=None):
                team.runners[2].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
                
            team.runners[2] = team.runners[1]
            team.runners[1] = team.runners[0]
            team.runners[0] = team.roster[team.index]

        elif(result=="double"):
            if(team.runners[2]!=None):
                team.runners[2].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
            if(team.runners[1]!=None):
                team.runners[1].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
                
            team.runners[2] = team.runners[0]
            team.runners[1] = team.roster[team.index]
            team.runners[0] = None

        elif(result=="triple"):
            if(team.runners[2]!=None):
                team.runners[2].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
            if(team.runners[1]!=None):
                team.runners[1].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
            if(team.runners[0]!=None):
                team.runners[0].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
                
            team.runners[2] = team.roster[team.index]
            team.runners[1] = None
            team.runners[0] = None

        elif(result=="home_run"):
            team.roster[team.index].cum_runs+=1
            team.roster[team.index].cum_rbi+=1
            team.runners[3]+=1
            
            if(team.runners[2]!=None):
                team.runners[2].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
            if(team.runners[1]!=None):
                team.runners[1].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
            if(team.runners[0]!=None):
                team.runners[0].cum_runs+=1
                team.roster[team.index].cum_rbi+=1
                team.runners[3]+=1
                
            team.runners[2] = None
            team.runners[1] = None
            team.runners[0] = None

        elif(result=="walk"):
            temp_2 = team.runners[2]
            temp_1 = team.runners[1]
            temp_0 = team.runners[0]
            
            team.runners[0] = team.roster[team.index]
            
            if(temp_0!=None):
                team.runners[1] = temp_0
                
                if(temp_1!=None):
                    team.runners[2] = temp_1
                    
                    if(temp_2!=None):
                        team.runners[3]+=1

        if(team.runners[3]>0):
            team.runs+=team.runners[3]
            team.runners[3] = 0     
    def print_result(self):
        """Printed score of a game""" 
        print("final score: " + away_team.name + " " + str(away_team.runs) + " " + home_team.name + " " + str(home_team.runs))     
    def print_record(self):
        """Printed score of a game""" 
        print(away_team.name + ": " + str(away_team.wins) + "-" + str(away_team.losses))
        print(home_team.name + ": " + str(home_team.wins) + "-" + str(home_team.losses))     
    def play(self):
        """A game between two instances of the team class is played""" 
        inning = 1
        while(inning<10 or away_team.runs==home_team.runs):
            for side in [away_team, home_team]:
                outs = 0

                while(outs<3):
                    num = random.uniform(0,1)
                    result = self.get_result(side.roster[side.index], num)
                    if(result=="out"):
                        outs+=1
                    else:
                        self.move_runners(side, result)
                        
                    side.index+=1
                    side.index = side.index%8
                    
                side.reset_runners()
            inning+=1
        
        if(away_team.runs>home_team.runs):
            away_team.wins+=1
            home_team.losses+=1
        elif(away_team.runs<home_team.runs):
            away_team.losses+=1
            home_team.wins+=1
        
        away_team.cum_runs+=away_team.runs
        away_team.restart()
        home_team.cum_runs+=home_team.runs
        home_team.restart()              
        
class Season(Game):
    """Class that holds information for a Season of games.
    
    Attributes:
        away_team: away team name
        home_team: home team name
    """
    
    def __init__(self, away_team, home_team):
        """Initializes values for this class"""
        self.home_team = home_team
        self.away_team = away_team
    def simulate_season(self, games):
        """A season of games is simulated
        
        Args:
            games: number of games to be played
        """
        for game in range(1, games+1):
            g = Game(away_team, home_team)
            g.play() 
            
s.simulate_season(1000)
s.print_record()
a1.print_stats()            

Away: 9509-9491
Home: 9491-9509
Avg: 0
OBP: 0
SLG: 0
OPS: 0
HR: 0


In [9]:
def convert_int(row, column):
    
    if(column in row.index):
         if(row[column]!=''):
            return(int(row[column]))
         else:
            return(0)
        
def convert_float(row, column):
    
    if(column in row.index):
        if(row[column]!=''):
            return(float(row[column]))
        else:
            return(0.0)

In [10]:
headers = {'User-Agent': header_name}
batting_dict = {}
URL_dict = {v: 'https://www.baseball-reference.com/teams/'+ k + '/2019.shtml' for k, v in acronym_to_name_dict.items()}
data_columns = ['Pos', 'pos', 'Name', 'player', 'Age', 'age', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR', 'SB', 'CS', 'BB', 'SO', 'HBP']
normal_link_start = 'https://www.baseball-reference.com'


for name, URL in URL_dict.items():
    source = requests.get(URL, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    batting_columns = []
    
    for table in soup.findAll('table', attrs={'id': 'team_batting'}):
        for labels in table.findAll('thead'):
            for header_value in labels.findAll('th'):
                if(header_value.get_text() in data_columns):
                    batting_columns.append(header_value.get_text())

                    if(header_value.get_text()=='Name'):  #additional column for player url
                        batting_columns.append('Link')

            df = pd.DataFrame(columns=batting_columns)
            
        for player in table.findAll('tbody'):
            for row in player.findAll('tr'):
                if(row.attrs and row['class'][0]=='thead'):
                    continue

                player_values = []


                for column_value in row.findAll('td'):
                    
                    if(column_value['data-stat'] in data_columns):
                        player_values.append(column_value.get_text())
                        
                        if(column_value['data-stat']=='player'):
                            player_values.append(normal_link_start + column_value.a['href'])  #adding player url normal stats
                   
                df = df.append(pd.Series(player_values, index=df.columns),
                                                      ignore_index=True)
                
               
    batting_dict.update({name: df})
        
    print(batting_dict)

{'Atlanta':    Pos                Name                                               Link  \
0    C       Tyler Flowers  https://www.baseball-reference.com/players/f/f...   
1   1B    Freddie Freeman*  https://www.baseball-reference.com/players/f/f...   
2   2B       Ozzie Albies#  https://www.baseball-reference.com/players/a/a...   
3   SS      Dansby Swanson  https://www.baseball-reference.com/players/s/s...   
4   3B      Josh Donaldson  https://www.baseball-reference.com/players/d/d...   
5   LF        Austin Riley  https://www.baseball-reference.com/players/r/r...   
6   CF    Ronald Acuna Jr.  https://www.baseball-reference.com/players/a/a...   
7   RF      Nick Markakis*  https://www.baseball-reference.com/players/m/m...   
8    C       Brian McCann*  https://www.baseball-reference.com/players/m/m...   
9   IF      Johan Camargo#  https://www.baseball-reference.com/players/c/c...   
10  RF      Matthew Joyce*  https://www.baseball-reference.com/players/j/j...   
11  CF     Ender

In [11]:
headers = {'User-Agent': header_name}
pitching_dict = {}
URL_dict = {v: 'https://www.baseball-reference.com/teams/'+ k + '/2019.shtml' for k, v in acronym_to_name_dict.items()}

data_columns = ['Pos', 'pos', 'Name', 'player', 'Age', 'age', 'G', 'GS', 'CG', 'SV', 'H', 'R',
               'ER', 'HR', 'BB', 'IBB', 'SO', 'HBP', 'batters_faced', 'BK', 'WP', 'BF', 'ERA', 
                'earned_run_avg', 'IP', 'FIP', 'fip', 'WHIP', 'whip']

br_link_start = 'https://www.baseball-reference.com'

for name, URL in URL_dict.items():
    source = requests.get(URL, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    pitching_columns = []
    
    for table in soup.findAll('table', attrs={'id': 'team_pitching'}):
        for labels in table.findAll('thead'):
            for header_value in labels.findAll('th'):
                if(header_value.get_text() in data_columns):
                    pitching_columns.append(header_value.get_text())
                    
                    if(header_value.get_text()=='Name'):  #additional column for player url
                        pitching_columns.append('Link')

            df = pd.DataFrame(columns=pitching_columns)
            
        for player in table.findAll('tbody'):
            for row in player.findAll('tr'):
                if(row.attrs and row['class'][0]=='thead'):
                    continue

                player_values = []

                for column_value in row.findAll('td'):
                    
                    if(column_value['data-stat'] in data_columns):
                        player_values.append(column_value.get_text())
                        
                        if(column_value['data-stat']=='player'):
                            player_values.append(br_link_start + column_value.a['href'])  #adding player url
                            
                df = df.append(pd.Series(player_values, index=df.columns),
                                                      ignore_index=True)
                
    pitching_dict.update({name: df})
        
    print(pitching_dict)

{'Atlanta':    Pos               Name                                               Link  \
0   SP        Mike Soroka  https://www.baseball-reference.com/players/s/s...   
1   SP      Julio Teheran  https://www.baseball-reference.com/players/t/t...   
2   SP         Max Fried*  https://www.baseball-reference.com/players/f/f...   
3   SP   Mike Foltynewicz  https://www.baseball-reference.com/players/f/f...   
4   SP    Dallas Keuchel*  https://www.baseball-reference.com/players/k/k...   
5   SP      Kevin Gausman  https://www.baseball-reference.com/players/g/g...   
6   CL       Luke Jackson  https://www.baseball-reference.com/players/j/j...   
7   RP        Josh Tomlin  https://www.baseball-reference.com/players/t/t...   
8   RP      Sean Newcomb*  https://www.baseball-reference.com/players/n/n...   
9   RP    Anthony Swarzak  https://www.baseball-reference.com/players/s/s...   
10  RP     Jerry Blevins*  https://www.baseball-reference.com/players/b/b...   
11        Touki Toussaint  h

In [12]:
string_columns = ['Pos', 'Name']
int_columns = ['Age', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR',
               'RBI', 'SB', 'CS', 'BB', 'SO', 'OPS+', 'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB']
float_columns = ['BA', 'OBP', 'SLG', 'OPS']

for k, df in batting_dict.items():
    for column in df.columns:
        if(column in int_columns):
            df[column] = df.apply(lambda row: convert_int(row, column), axis=1)
        elif(column in float_columns):
            df[column] = df.apply(lambda row: convert_float(row, column), axis=1)

In [13]:
string_columns = ['Pos', 'Name']
int_columns = ['Age', 'W', 'L', 'G', 'GS', 'GF', 'CG', 'SHO', 'SV', 'H', 'R',
               'ER', 'HR', 'BB', 'IBB', 'SO', 'HBP', 'BK', 'WP', 'BF', 'ERA+']
float_columns = ['W-L%', 'ERA', 'IP', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9',
                'SO/W']

for k, df in pitching_dict.items():
    for column in df.columns:
        if(column in int_columns):
            df[column] = df.apply(lambda row: convert_int(row, column), axis=1)
        elif(column in float_columns):
            df[column] = df.apply(lambda row: convert_float(row, column), axis=1)

In [25]:
headers = {'User-Agent': header_name}
url_dict = {}
batter_dict = {}
player_id_dict = {}
data_columns = ['Year', 'Tm', 'team_ID', 'Pos', 'POS', 'pos_season', 'Name', 'player', 'Age', 'age', 'G', 'PA', 'AB', 
                'R', 'H', '2B', '3B', 'HR', 'SB', 'CS', 'BB', 'SO', 'HBP']
platoon_link_start = 'https://www.baseball-reference.com/players/split.fcgi?id='
platoon_link_middle = '&year='
platoon_link_end = '&t=b'

for k, v in batting_dict.items():
    for name, url in zip(v.Name, v.Link):
        url_dict.update({name: url})
      
for name, URL in url_dict.items():
    source = requests.get(URL, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    batting_columns = []
    player_ID = URL.split('/')[5].split('.')[0]
    
    for table in soup.findAll('table', attrs={'id': 'batting_standard'}):
        for labels in table.findAll('thead'):
            for header_value in labels.findAll('th'):
                if(header_value.get_text() in data_columns):
                    batting_columns.append(header_value.get_text())
                    
            batting_columns.append('ID')
            df = pd.DataFrame(columns=batting_columns)
            
        for player in table.findAll('tbody'):
            for row in player.findAll('tr'):
                
                if(row['class'][0]=='minors_table'):
                    continue
        
                player_values = []
                year = row.th.get_text()
                player_values.append(year)

                for column_value in row.findAll('td'):
                    
                    if(column_value['data-stat'] in data_columns):
                        player_values.append(column_value.get_text())
                
                player_values.append(platoon_link_start+player_ID+platoon_link_middle+year+platoon_link_end)
                df = df.append(pd.Series(player_values, index=df.columns), ignore_index=True)

    batter_dict.update({name: df})
    player_id_dict.update({name: player_ID})

print(batter_dict)

{'Tyler Flowers':     Year Age   Tm    G   PA   AB   R   H  2B 3B  HR SB CS  BB   SO HBP    Pos  \
0   2009  23  CHW   10   20   16   3   3   1  0   0  0  0   3    8   1    /2D   
1   2010  24  CHW    8   15   11   2   1   0  0   0  0  0   4    5   0     /2   
2   2011  25  CHW   38  129  110  13  23   5  1   5  0  1  14   38   3   2/3D   
3   2012  26  CHW   52  153  136  19  29   6  0   7  2  1  12   56   4   2/3D   
4   2013  27  CHW   84  275  256  24  50  11  0  10  0  1  14   94   4      2   
5   2014  28  CHW  127  442  407  42  98  16  1  15  0  1  25  159   8     *2   
6   2015  29  CHW  112  361  331  21  79  12  0   9  0  1  21  104   6  *2/3D   
7   2016  30  ATL   83  325  281  27  76  18  0   8  0  0  29   91  11      2   
8   2017  31  ATL   99  370  317  41  89  16  0  12  0  1  31   82  20    2/D   
9   2018  32  ATL   82  296  251  34  57   9  0   8  0  0  35   76   9    2/D   
10  2019  33  ATL   85  310  271  36  62  11  3  11  0  0  31  105   6      2   

         

In [41]:
headers = {'User-Agent': header_name}

platoon_dict = {}
data_columns = ['Split', 'Year', 'Tm', 'team_ID', 'Pos', 'POS', 'pos_season', 'Name', 'player', 'Age', 'age', 'G', 'PA', 'AB', 
                'R', 'H', '2B', '3B', 'HR', 'SB', 'CS', 'BB', 'SO', 'HBP']
platoon_link_start = 'https://www.baseball-reference.com/players/split.fcgi?id='
platoon_link_end = '&year=Career&t=b'
link = 'https://www.baseball-reference.com/players/split.fcgi?id=flowety01&year=Career&t=b'

for name, player_ID in player_id_dict.items():
    URL = platoon_link_start+player_ID+platoon_link_end
    source = requests.get(URL, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    batting_columns = []
   
    for table in soup.findAll('div', attrs={'id': 'div_plato'}):
        print(table.get_text())
        for labels in table.findAll('thead'):
            for header_value in labels.findAll('th'):
                if(header_value.get_text() in data_columns):
                    batting_columns.append(header_value.get_text())

            print(batting_columns)
            df = pd.DataFrame(columns=batting_columns)

        for player in table.findAll('tbody'):
            for row in player.findAll('tr'):
                print(row.td[0].get_text())
                if(row.td['data-row'][0]!='minors_table'):
                    continue

                player_values = []
                year = row.th.get_text()
                player_values.append(year)

                for column_value in row.findAll('td'):

                    if(column_value['data-stat'] in data_columns):
                        player_values.append(column_value.get_text())

                player_values.append(platoon_link_start+player_ID+platoon_link_middle+year+platoon_link_end)
                df = df.append(pd.Series(player_values, index=df.columns), ignore_index=True)

        platoon_dict.update({name: df})

print(platoon_dict)

KeyboardInterrupt: 

In [17]:
headers = {'User-Agent': header_name}
url_dict = {}
pitcher_dict = {}
data_columns = ['Year', 'Pos', 'pos', 'Name', 'player', 'Age', 'age', 'G', 'GS', 'CG', 'SV', 'H', 'R',
                'ER', 'HR', 'BB', 'IBB', 'SO', 'HBP', 'batters_faced', 'BK', 'WP', 'BF', 'ERA', 
                'earned_run_avg', 'IP', 'FIP', 'fip', 'WHIP', 'whip']

for k, v in pitching_dict.items():
    for name, url in zip(v.Name, v.Link):
        url_dict.update({name: url})
      
for name, URL in url_dict.items():       
    source = requests.get(URL, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    pitching_columns = []
    
    for table in soup.findAll('table', attrs={'id': 'pitching_standard'}):
        for labels in table.findAll('thead'):
            for header_value in labels.findAll('th'):
                if(header_value.get_text() in data_columns):
                    pitching_columns.append(header_value.get_text())

            df = pd.DataFrame(columns=pitching_columns)
            
        for player in table.findAll('tbody'):
            for row in player.findAll('tr'):
                
                if(row['class'][0]=='minors_table'):
                    continue
                    
        
                player_values = []
                player_values.append(row.th.get_text())

                for column_value in row.findAll('td'):
                    
                    if(column_value['data-stat'] in data_columns):
                        player_values.append(column_value.get_text())
                    
                df = df.append(pd.Series(player_values, index=df.columns),
                                                      ignore_index=True)

    pitcher_dict.update({name: df})

print(pitcher_dict)

{'Mike Soroka':    Year Age   ERA   G  GS CG SV     IP    H   R  ...  HR  BB IBB   SO HBP BK  \
0  2018  20  3.51   5   5  0  0   25.2   30  14  ...   1   7   0   21   0  0   
1  2019  21  2.68  29  29  0  0  174.2  153  56  ...  14  41   1  142   7  0   

  WP   BF   FIP   WHIP  
0  2  113  2.85  1.442  
1  3  701  3.45  1.111  

[2 rows x 21 columns], 'Julio Teheran':    Year Age   ERA   G  GS CG SV     IP    H    R  ...  HR  BB IBB   SO HBP BK  \
0  2011  20  5.03   5   3  0  0   19.2   21   11  ...   4   8   0   10   0  0   
1  2012  21  5.68   2   1  0  0    6.1    5    4  ...   0   1   0    5   0  0   
2  2013  22  3.20  30  30  0  0  185.2  173   69  ...  22  45   4  170  13  0   
3  2014  23  2.89  33  33  4  0  221.0  188   82  ...  22  51   4  186   4  1   
4  2015  24  4.04  33  33  0  0  200.2  189   99  ...  27  73   3  171   9  0   
5  2016  25  3.21  30  30  1  0  188.0  157   70  ...  22  41   2  167   9  1   
6  2017  26  4.49  32  32  0  0  188.1  186  103  ...  31  7

In [55]:
headers = {'User-Agent': header_name}

platoon_dict = {}
data_columns = ['Split', 'Year', 'Tm', 'team_ID', 'Pos', 'POS', 'pos_season', 'Name', 'player', 'Age', 'age', 'G', 'PA', 'AB', 
                'R', 'H', '2B', '3B', 'HR', 'SB', 'CS', 'BB', 'SO', 'HBP']

link = 'https://www.baseball-reference.com/players/split.fcgi?id=flowety01&year=2019'



source = requests.get(link, headers=headers)
soup = BeautifulSoup(source.content, 'html.parser')
#print(soup.get_text())
batting_columns = []

for table in soup.findAll('table', attrs={'id': 'plato'}):
    print(table.get_text())
    for labels in table.findAll('thead'):
        for header_value in labels.findAll('th'):
            if(header_value.get_text() in data_columns):
                batting_columns.append(header_value.get_text())

        print(batting_columns)
        df = pd.DataFrame(columns=batting_columns)

    for player in table.findAll('tbody'):
        for row in player.findAll('tr'):
            print(row.td[0].get_text())
            if(row.td['data-row'][0]!='minors_table'):
                continue

            player_values = []
            year = row.th.get_text()
            player_values.append(year)

            for column_value in row.findAll('td'):

                if(column_value['data-stat'] in data_columns):
                    player_values.append(column_value.get_text())

            player_values.append(platoon_link_start+player_ID+platoon_link_middle+year+platoon_link_end)
            df = df.append(pd.Series(player_values, index=df.columns), ignore_index=True)

    platoon_dict.update({name: df})

print(platoon_dict)

{}


In [108]:
url = 'http://www.espn.com/mlb/player/splits/_/id/31283/type/batting3'

headers = {'User-Agent': header_name}

platoon_dict = {}
data_columns = ['Split', 'Year', 'Tm', 'team_ID', 'Pos', 'POS', 'pos_season', 'Name', 'player', 'Age', 'age', 'G', 'PA', 'AB', 
                'R', 'H', '2B', '3B', 'HR', 'SB', 'CS', 'BB', 'SO', 'HBP']



source = requests.get(url, headers=headers)
soup = BeautifulSoup(source.content, 'html.parser')
left_columns = ['Name', 'Bats']
right_columns = []
name = soup.h1.get_text()

for bio in soup.findAll('div', attrs={'class': 'player-bio'}):
    for info in bio.findAll('ul', attrs={'class': 'general-info'}):
        bats = info.get_text().split('Bats')[1][2]

for table in soup.findAll('table', attrs={'class': 'tablehead'}):
    labels = table.find('tr', attrs={'class': 'colhead'})
    
    for header_value in labels.findAll('td'):
        left_columns.append(header_value.get_text()+'L')
        right_columns.append(header_value.get_text()+'R')
                
    df = pd.DataFrame(columns=(left_columns+right_columns))            
    player_values = [name, bats]
    
    for column_value in table.findAll('tr', attrs={'class': ['oddrow', 'evenrow']}):
        if(column_value.td.get_text() in ['vs. Left', 'vs. Right']):
            for value in column_value.findAll('td'):
                player_values.append(value.get_text())
     
    df = df.append(pd.Series(player_values, index=df.columns), ignore_index=True)

df.drop(['OverallL', 'OverallR'], axis=1, inplace=True)   
print(df)

               Name Bats  ABL  RL   HL 2BL 3BL HRL RBIL BBL  ... RBIR  BBR  \
0  Christian Yelich    L  503  82  148  30   3  25   75  53  ...  213  175   

  HBPR  SOR SBR CSR  AVGR  OBPR  SLGR  OPSR  
0   12  282  51   5  .318  .410  .582  .992  

[1 rows x 34 columns]


In [158]:
url = 'http://www.espn.com/mlb/history/leaders/_/breakdown/season/year/2019/start/'
links = [url+str(i) for i in range(1, 350, 50)]
headers = {'User-Agent': header_name}

batter_stats = pd.DataFrame()

for (num, link) in enumerate(links):
    source = requests.get(link, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    table = soup.find('table', attrs={'class': 'tablehead'})
    if(num==0):
        column_headers = []
        table_headers = table.find('tr', attrs={'class': 'colhead'})
        for header_value in table_headers.findAll('td'):
            column_headers.append(header_value.get_text())

        column_headers[0] = 'ID'
        batter_stats = pd.DataFrame(columns=column_headers)
        
    df = pd.DataFrame(columns=column_headers)            


    for player in table.find_all('tr', attrs={'class': re.compile('row player-10-')}):
        player_values = []
        player_values.append(player['class'][1].split('-')[2])

        values =  player.findAll('td')
        values.pop(0)
        for value in values:
            player_values.append(value.get_text())

        df = df.append(pd.Series(player_values, index=df.columns), ignore_index=True)
    
    batter_stats = pd.concat([batter_stats, df], ignore_index=True)

print(batter_stats)

        ID            PLAYER YRS    G   AB    R    H  2B 3B  HR RBI  BB   SO  \
0     6524    Howie Kendrick  14  121  334   61  115  23  1  17  62  27   49   
1    33184      Tim Anderson   4  123  498   81  167  32  0  18  56  15  109   
2    39572       Luis Arraez   1   92  326   54  109  20  1   4  28  36   29   
3    29703    Donovan Solano   6   81  215   27   71  13  1   4  23  10   49   
4    31283  Christian Yelich   7  130  489  100  161  29  3  44  97  80  118   
..     ...               ...  ..  ...  ...  ...  ...  .. ..  ..  ..  ..  ...   
341  32819     Lewis Brinson   3   75  226   15   39   9  1   0  15  13   74   
342  30755      Keon Broxton   5  100  204   24   34   4  0   6  16  20  104   
343  32657       Mike Zunino   7   90  266   30   44  10  1   9  32  20   98   
344   5921       Jeff Mathis  15   86  228   17   36   9  0   2  12  15   87   
345  32890       Travis Shaw   5   86  230   22   36   5  0   7  16  36   89   

     SB CS    BA  
0     2  1  .344  
1

In [159]:
url = 'http://www.espn.com/mlb/history/leaders/_/type/pitching/breakdown/season/year/2019/sort/wins/start/'
links = [url+str(i) for i in range(1, 500, 50)]
headers = {'User-Agent': header_name}

pitcher_stats = pd.DataFrame()

for (num, link) in enumerate(links):
    source = requests.get(link, headers=headers)
    soup = BeautifulSoup(source.content, 'html.parser')
    table = soup.find('table', attrs={'class': 'tablehead'})
    if(num==0):
        column_headers = []
        table_headers = table.find('tr', attrs={'class': 'colhead'})
        for header_value in table_headers.findAll('td'):
            column_headers.append(header_value.get_text())

        column_headers[0] = 'ID'
        pitcher_stats = pd.DataFrame(columns=column_headers)
        
    df = pd.DataFrame(columns=column_headers)            


    for player in table.find_all('tr', attrs={'class': re.compile('row player-10-')}):
        player_values = []
        player_values.append(player['class'][1].split('-')[2])

        values =  player.findAll('td')
        values.pop(0)
        for value in values:
            player_values.append(value.get_text())

        df = df.append(pd.Series(player_values, index=df.columns), ignore_index=True)
    
    pitcher_stats = pd.concat([pitcher_stats, df], ignore_index=True)

print(pitcher_stats)

        ID             PLAYER YRS   G  GS CG SH     IP    H  ER  BB   SO   W  \
0     6341   Justin Verlander  15  34  34  2  1  223.0  137  64  42  300  21   
1    32081        Gerrit Cole   7  33  33  0  0  212.1  142  59  48  326  20   
2    32675  Eduardo Rodriguez   5  34  34  0  0  203.1  195  86  75  213  19   
3     5883       Zack Greinke  16  33  33  0  0  208.2  175  68  30  187  18   
4    30373  Stephen Strasburg  10  33  33  0  0  209.0  161  77  56  251  18   
..     ...                ...  ..  ..  .. .. ..    ...  ...  ..  ..  ...  ..   
495  35009        Paul Sewald   3  17   0  0  0   19.2   18  10   3   22   1   
496  35098     Ryan Carpenter   2   9   9  0  0   40.2   61  42  13   25   1   
497  35135       Tanner Scott   3  28   0  0  0   26.1   28  14  19   37   1   
498  35328       Pedro Payano   1   6   4  0  0   22.0   26  14  15   17   1   
499  35391       Luke Farrell   3   9   1  0  0   13.1    6   4   3   12   1   

     L SV   ERA  
0    6  0  2.58  
1  

In [223]:
string_columns = ['Pos', 'Name', 'PLAYER']
int_columns = ['ID', 'Age', 'G', 'PA', 'AB', 'R', 'H', '2B', '3B', 'HR',
               'RBI', 'SB', 'CS', 'BB', 'SO', 'OPS+', 'TB', 'GDP', 'HBP', 'SH', 'SF', 'IBB']
float_columns = ['BA', 'OBP', 'SLG', 'OPS']

for column in batter_stats.columns:
    if(column in int_columns):
        batter_stats[column] = batter_stats.apply(lambda row: convert_int(row, column), axis=1)
    elif(column in float_columns):
        batter_stats[column] = batter_stats.apply(lambda row: convert_float(row, column), axis=1)

In [202]:
string_columns = ['Pos', 'Name', 'PLAYER']
int_columns = ['ID', 'Age', 'W', 'L', 'G', 'GS', 'GF', 'CG', 'SHO', 'SV', 'H', 'R',
               'ER', 'HR', 'BB', 'IBB', 'SO', 'HBP', 'BK', 'WP', 'BF', 'ERA+']
float_columns = ['W-L%', 'ERA', 'IP', 'FIP', 'WHIP', 'H9', 'HR9', 'BB9', 'SO9',
                'SO/W']


for column in pitcher_stats.columns:
    if(column in int_columns):
        pitcher_stats[column] = pitcher_stats.apply(lambda row: convert_int(row, column), axis=1)
    elif(column in float_columns):
        pitcher_stats[column] = pitcher_stats.apply(lambda row: convert_float(row, column), axis=1)

In [273]:
url = 'https://www.espn.com/mlb/team/roster/_/name/hou/houston-astros'
headers = {'User-Agent': header_name}
source = requests.get(url, headers=headers)
soup = BeautifulSoup(source.content, 'html.parser')
pitcher_id_dict = {}
astros_pitchers = []
astros_batters = []

for player in soup.findAll('td', attrs={'class': 'Table__TD'}):
    row = player.find('a', attrs={'class': 'AnchorLink'})
    if(row and row.get_text()!=''):
        player_id_dict.update({row.get_text(): int(row['href'].split('/')[-1])})

for name, ID in player_id_dict.items():
    if(ID in pitcher_stats['ID'].tolist()):

        stats = pitcher_stats.loc[pitcher_stats['ID']== ID]
        ip = math.modf(stats['IP'])
        outs = int(ip[0]*10 + ip[1]*3)
        total_batters = outs + stats['BB'].values[0] + stats['H'].values[0]
        hit_ratio = stats['H'].values[0]/total_batters
        singles = round(hit_ratio*.64, 3)
        doubles = round(hit_ratio*.2, 3)
        triples = round(hit_ratio*.02, 3)
        home_runs = round(hit_ratio*.14, 3)
        walks = round(stats['BB'].values[0]/total_batters, 3)

        astros_pitchers.append(Pitcher('Astros', name, singles, doubles, triples, home_runs, walks))
    
    elif(ID in batter_stats['ID'].tolist()):
        stats = batter_stats.loc[batter_stats['ID']== ID]
        num_singles = stats['H'].values[0]-stats['2B'].values[0]-stats['3B'].values[0]-stats['HR'].values[0]
        pa = stats['AB'].values[0] + stats['BB'].values[0]
        singles = round(num_singles/pa, 3)
        doubles = round(stats['2B'].values[0]/pa, 3)
        triples = round(stats['3B'].values[0]/pa, 3)
        home_runs = round(stats['HR'].values[0]/pa, 3)
        walks = round(stats['BB'].values[0]/pa, 3)
        
        astros_batters.append(Batter('Astros', name, singles, doubles, triples, home_runs, walks))
        
Astros =Team('Astros', astros_batters, astros_pitchers)
Astros2 =Team('Astros', astros_batters, astros_pitchers)
for n in Astros.lineup:
    print(n.name)


Alex Bregman
Yordan Alvarez
George Springer
Michael Brantley
Carlos Correa
Jose Altuve
Aledmys Diaz
Yuli Gurriel
Josh Reddick
