In [1]:
import requests
import re
import json
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize
import unicodedata

In [2]:
def box_score(year,month,day):
    r = requests.get('http://gd2.mlb.com/components/game/mlb/year_'+ str(year) +'/month_0' + str(month) +'/day_' + str(day) +'/master_scoreboard.json')
    game_data = r.json()['data']['games']['game']
    games = []
    for n in range(0,len(game_data)):
        if game_data[n]['status']['status'] != 'Postponed':
            games.append(game_data[n]['game_data_directory'])
    return games

def parse_boxscore(year,month,day):
    game_list = box_score(year,month,day)
    daily_stat_dict = {}
    for m in range(0,len(game_list)):
        if game_list[m] > 0:
            batter = requests.get('http://gd2.mlb.com'+ game_list[m] +'/boxscore.json').json()['data']['boxscore']['batting']
            pitcher = requests.get('http://gd2.mlb.com'+ game_list[m] +'/boxscore.json').json()['data']['boxscore']['pitching']

            for n in range(0,len(batter[0]['batter'])):
                daily_stat_dict[batter[0]['batter'][n]['id']] = {'name': batter[0]['batter'][n]['name_display_first_last'],
                                                                 'ab': int(batter[0]['batter'][n]['ab']),
                                                                 'h_b': int(batter[0]['batter'][n]['h']),
                                                                 'r': int(batter[0]['batter'][n]['r']),
                                                                 'hr': int(batter[0]['batter'][n]['hr']),
                                                                 'rbi': int(batter[0]['batter'][n]['rbi']),
                                                                 'sb': int(batter[0]['batter'][n]['sb']),
                                                                 'pos': batter[0]['batter'][n]['pos']
                                                                }


            for n in range(0,len(batter[1]['batter'])):
                daily_stat_dict[batter[1]['batter'][n]['id']] = {'name': batter[1]['batter'][n]['name_display_first_last'],
                                                                 'ab': int(batter[1]['batter'][n]['ab']),
                                                                 'h_b': int(batter[1]['batter'][n]['h']),
                                                                 'r': int(batter[1]['batter'][n]['r']),
                                                                 'hr': int(batter[1]['batter'][n]['hr']),
                                                                 'rbi': int(batter[1]['batter'][n]['rbi']),
                                                                 'sb': int(batter[1]['batter'][n]['sb']),
                                                                 'pos': batter[1]['batter'][n]['pos']
                                                                }

            for n in range(0,len(pitcher[0]['pitcher'])):
                daily_stat_dict[pitcher[0]['pitcher'][n]['id']] = {'name': pitcher[0]['pitcher'][n]['name_display_first_last'],
                                                                   'so': int(pitcher[0]['pitcher'][n]['so']),
                                                                   'h_p': int(pitcher[0]['pitcher'][n]['h']),
                                                                   'bb': int(pitcher[0]['pitcher'][n]['bb']),
                                                                   'out': int(pitcher[0]['pitcher'][n]['out']),
                                                                   'er': int(pitcher[0]['pitcher'][n]['er']),
                                                                   'pos': pitcher[0]['pitcher'][n]['pos']
                                                                  }
                if 'note' in pitcher[0]['pitcher'][n]:
                    if 'W' in pitcher[0]['pitcher'][n]['note']:
                        daily_stat_dict[pitcher[0]['pitcher'][n]['id']].update({'w': 1})
                    if '(S,' in pitcher[0]['pitcher'][n]['note']:
                        daily_stat_dict[pitcher[0]['pitcher'][n]['id']].update({'sv': 1})
            for n in range(0,len(pitcher[1]['pitcher'])):
                daily_stat_dict[pitcher[1]['pitcher'][n]['id']] = {'name': pitcher[1]['pitcher'][n]['name_display_first_last'],
                                                                   'so': int(pitcher[1]['pitcher'][n]['so']),
                                                                   'h_p': int(pitcher[1]['pitcher'][n]['h']),
                                                                   'bb': int(pitcher[1]['pitcher'][n]['bb']),
                                                                   'out': int(pitcher[1]['pitcher'][n]['out']),
                                                                   'er': int(pitcher[1]['pitcher'][n]['er']),
                                                                   'pos': pitcher[1]['pitcher'][n]['pos']
                                                                  }
                if 'note' in pitcher[1]['pitcher'][n]:
                    if 'W' in pitcher[1]['pitcher'][n]['note']:
                        daily_stat_dict[pitcher[1]['pitcher'][n]['id']].update({'w': 1})
                    if '(S,' in pitcher[1]['pitcher'][n]['note']:
                        daily_stat_dict[pitcher[1]['pitcher'][n]['id']].update({'sv': 1})
    
    return daily_stat_dict

def player_stats(year,month,day,team_dataframe):
    today = parse_boxscore(year,month,day)
    stat_list= ['ab',
                'h_b',
                'r',
                'hr',
                'rbi',
                'sb',
                'w',
                'sv',
                'so',
                'h_p',
                'bb',
                'out',
                'er',
                'pos'
               ]
    for index, row in team_dataframe.iterrows():
        if row['id'] in today:
            for n in range(0,len(stat_list)):
                if stat_list[n] in today[row['id']]:
                    team_dataframe.loc[index, stat_list[n]] = today[row['id']][stat_list[n]]
    team_dataframe = team_dataframe.fillna(value = 0)
    return team_dataframe 

def point_calc(year,month,day,team_dataframe):
    player_points = player_stats(year,month,day,team_dataframe)
    for index, row in player_points.iterrows():
        player_points.loc[index, 'points'] = row['w']*6 + row['sv']*6 + row['out'] + row['so'] - (row['h_p'] + row['bb'] + row['er']*2) + row['hr']*6 + row['sb']*5 + row['h_b']*4 + row['r']*2 + row['rbi']*2 - row['ab']
        
    return player_points
    
def rem_acnt(u_string):
    data = u_string.decode('utf-8')
    normal = unicodedata.normalize('NFKD', data).encode('ASCII', 'ignore')
    return normal

def points(dataframe):
    points_dict = {}
    pos_list = []
    for i in set(dataframe.yahoo_pos):
        pos_list.append(i)
    teams = []
    for i in set(dataframe.Team):
        teams.append(i)
    for n in range(0,len(teams)):
        points_dict[teams[n]] = {'C': [],
                                 '1B': [],
                                 '2B': [],
                                 '3B': [],
                                 'SS': [],
                                 'OF': [],
                                 'SP': [],
                                 'RP': []
                                }

    for index, row in dataframe.iterrows():
        for i in points_dict[row['Team']]:
            if i in row['yahoo_pos']:
                points_dict[row['Team']][i].append([row['name'],row['yahoo_pos'],row['points']])
    
    def takethird(list):
        return list[2]
    
    pos = ['C','1B','2B','3B','SS','OF','SP','RP']
    
    for i in points_dict:
        for n in range(0,len(pos)):
            points_dict[i][pos[n]].sort(key = takethird, reverse = True)

    return points_dict

def pos_clean(x):
    x = str(x)
    if 'F' in x:
        x = 'OF'
    return x




In [34]:
team_list = pd.read_csv('team_list.csv')

In [35]:
team_list['id'] = team_list['id'].apply(lambda x : str(x).split('.')[0])

In [36]:
team_list.head()

Unnamed: 0.1,Unnamed: 0,Team,name,id,yahoo_pos
0,0,Smoak Me A Kingery,Gary Sanchez,596142,C
1,1,Smoak Me A Kingery,Cody Bellinger,641355,"1B, OF"
2,2,Smoak Me A Kingery,Whit Merrifield,593160,"2B, OF"
3,3,Smoak Me A Kingery,Mike Moustakas,519058,3B
4,4,Smoak Me A Kingery,Corey Seager,608369,SS


In [37]:
del team_list['Unnamed: 0']

In [23]:
stats5112018 = point_calc(2018,5,11,team_list)

In [9]:
stats5112018

Unnamed: 0,Team,name,id,yahoo_pos,ab,h_b,r,hr,rbi,sb,pos,so,h_p,bb,out,er,w,sv,points
0,Smoak Me A Kingery,Gary Sanchez,596142,C,1.0,0.0,0.0,0.0,0.0,0.0,PH-C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
1,Smoak Me A Kingery,Cody Bellinger,641355,"1B, OF",3.0,1.0,0.0,0.0,0.0,0.0,1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,Smoak Me A Kingery,Whit Merrifield,593160,"2B, OF",4.0,2.0,1.0,0.0,0.0,0.0,2B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
3,Smoak Me A Kingery,Mike Moustakas,519058,3B,5.0,3.0,1.0,0.0,1.0,0.0,3B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0
4,Smoak Me A Kingery,Corey Seager,608369,SS,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Smoak Me A Kingery,Mitch Haniger,571745,OF,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Smoak Me A Kingery,Kris Bryant,592178,"3B, OF",3.0,2.0,3.0,1.0,2.0,0.0,3B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0
7,Smoak Me A Kingery,Michael Conforto,624424,OF,3.0,2.0,1.0,1.0,2.0,0.0,RF-LF,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0
8,Smoak Me A Kingery,Scott Kingery,664068,"2B, 3B, OF",4.0,2.0,0.0,0.0,0.0,0.0,SS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
9,Smoak Me A Kingery,Justin Smoak,475253,1B,5.0,0.0,0.0,0.0,0.0,0.0,1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.0


In [45]:
import pymysql
user = 'root'
passw = 'p@ssw0rd'
host =  'localhost'
port = 3306
database = 'baseball_project'



In [46]:
conn = pymysql.connect(host=host,
                       port=port,
                       user=user, 
                       passwd=passw,  
                       db=database,
                       charset='utf8')



In [None]:
team_list.to_sql(name='team_list', con=conn, if_exists = 'replace', index=False, flavor = 'mysql')

In [40]:
cur = conn.cursor()
cur.execute("SELECT * FROM baseball_project.team_list")

70

In [None]:
new_list = pd.read_sql(table_name = 'team_list', con = conn)

In [28]:
stats5112018.to_sql(name=database, con=conn, if_exists = 'replace', index=False, flavor = 'mysql')

In [None]:
points5112018 = points(stats5112018)
points5112018

In [28]:
for i in points5112018:
    for j in points5112018[i]:
        if len(points5112018[i][j]) == 1:
            print points5112018[i][j]
        

[['Gary Sanchez', 'C', -1.0]]
[['Corey Seager', 'SS', 0.0]]
[['Welington Castillo', 'C', -4.0]]
[['Ozzie Albies', '2B', 17.0]]
[['Trea Turner', 'SS', 10.0]]


In [282]:
total_points = {}
teams = []
for i in points5112018:
    teams.append(i)
pos = ['C','1B','2B','3B','SS','OF','SP','RP']
for i in points5112018:
    for n in range(0,len(pos)):
        if len(points5112018[i][pos[n]]) == 1:
            total_points[i] = {pos[n] : points5112018[i][pos[n]][0][2]}

In [283]:
pos

['C',
 'RP',
 'SS',
 'OF',
 'SP',
 '1B',
 '2B',
 '3B',
 'C',
 'RP',
 'SS',
 'OF',
 'SP',
 '1B',
 '2B',
 '3B',
 'C',
 'RP',
 'SS',
 'OF',
 'SP',
 '1B',
 '2B',
 '3B']

In [234]:
len(points5112018['Smoak Me A Kingery']['C']) == 1

True

In [270]:
total_points

{'Flyin Castillo Bros.': {'SS': 10.0}, 'Smoak Me A Kingery': {'SS': 0.0}}

In [29]:
def points2(dataframe):
    points_dict = {}
    teams = []
    for i in set(dataframe.Team):
        teams.append(i)
    for n in range(0,len(teams)):
        points_dict[teams[n]] = {'players': []} 
    for index, row in dataframe.iterrows():
        points_dict[row['Team']]['players'].append([row['name'], row['yahoo_pos'], row['points']])
    def takethird(list):
        return list[2]
    for i in points_dict:
        points_dict[i]['players'].sort(key = takethird, reverse = True)
    return points_dict

In [30]:
pointstry = points2(stats5112018)
pointstry

{'Flyin Castillo Bros.': {'players': [['Luke Weaver', 'SP', 20.0],
   ['Ozzie Albies', '2B', 17.0],
   ['Trea Turner', 'SS', 10.0],
   ['Joey Votto', '1B', 7.0],
   ['Teoscar Hernandez', 'OF', 1.0],
   ['Kyle Seager', '3B', 0.0],
   ['Ronald Acuna', 'OF', 0.0],
   ['Carlos Carrasco', 'SP', 0.0],
   ['Bud Norris', 'SP, RP', 0.0],
   ['Brad Boxberger', 'RP', 0.0],
   ['Chad Green', 'SP, RP', 0.0],
   ['Walker Buehler', 'SP', 0.0],
   ['J.A. Happ', 'SP', 0.0],
   ['Luis Castillo', 'SP', 0.0],
   ['Zack Godley', 'SP', 0.0],
   ['Garrett Richards', 'SP', 0.0],
   ['Luiz Gohara', 'SP', 0.0],
   ['Madison Bumgarner', 'SP', 0.0],
   ['Alex Reyes', 'SP, RP', 0.0],
   ['Rhys Hoskins', '1B, OF', -2.0],
   ['Kenta Maeda', 'SP', -2.0],
   ['Freddie Freeman', '1B, 3B', -3.0],
   ['Jose Martinez', '1B, OF', -3.0],
   ['Welington Castillo', 'C', -4.0]]},
 'Red Bat and Blue Bat': {'players': [['Manny Machado', '3B, SS', 32.0],
   ['Salvador Perez', 'C', 13.0],
   ['Jameson Taillon', 'SP', 12.0],
   ['D

In [136]:
pointstry['Smoak Me A Kingery']['players'][0]

['Kris Bryant', '3B, OF', 21.0]

In [167]:
total_points = {}
teams = []
for i in pointstry:
    teams.append(i)
    total_points[i] = {'points': {'C': [],
                                  '1B': [],
                                  '2B': [],
                                  '3B': [],
                                  'SS': [],
                                  'OF': [],
                                  'SP': [],
                                  'RP':[]
                                 },
                      }
pos = ['C','1B','2B','3B','SS','OF','SP','RP']
used_players = []
for i in pointstry:
    for m in range(0,len(pointstry[i]['players'])):
        for n in range(0,len(pos)):
            if pos[n] in pointstry[i]['players'][m][1] and len(points5112018[i][pos[n]]) == 1:
                total_points[i]['points'][pos[n]].append([pointstry[i]['players'][m][0], pointstry[i]['players'][m][2]])
                used_players.append(pointstry[i]['players'][m])
            elif pos[n] in pointstry[i]['players'][m][1] and pointstry[i]['players'][m] not in used_players and len(pointstry[i]['players'][m][1]) <= 2:
                total_points[i]['points'][pos[n]].append([pointstry[i]['players'][m][0], pointstry[i]['players'][m][2]])
                used_players.append(pointstry[i]['players'][m])
            #elif pos[n] in pointstry[i]['players'][m][1] and pointstry[i]['players'][m] not in used_players and poointstry[i]['players'][m][] 
                    
            else:
                print 'no'


no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
no
n

In [168]:
total_points

{'Flyin Castillo Bros.': {'points': {'1B': [['Joey Votto', 7.0]],
   '2B': [['Ozzie Albies', 17.0]],
   '3B': [['Kyle Seager', 0.0]],
   'C': [['Welington Castillo', -4.0]],
   'OF': [['Teoscar Hernandez', 1.0], ['Ronald Acuna', 0.0]],
   'RP': [['Brad Boxberger', 0.0]],
   'SP': [['Luke Weaver', 20.0],
    ['Carlos Carrasco', 0.0],
    ['Walker Buehler', 0.0],
    ['J.A. Happ', 0.0],
    ['Luis Castillo', 0.0],
    ['Zack Godley', 0.0],
    ['Garrett Richards', 0.0],
    ['Luiz Gohara', 0.0],
    ['Madison Bumgarner', 0.0],
    ['Kenta Maeda', -2.0]],
   'SS': [['Trea Turner', 10.0]]}},
 'Red Bat and Blue Bat': {'points': {'1B': [['Paul Goldschmidt', 0.0],
    ['Carlos Santana', -4.0],
    ['Hanley Ramirez', -4.0]],
   '2B': [['Derek Dietrich', 10.0], ['DJ LeMahieu', 3.0]],
   '3B': [['Justin Turner', 0.0]],
   'C': [['Salvador Perez', 13.0]],
   'OF': [['Kevin Pillar', 8.0], ['Bryce Harper', 0.0]],
   'RP': [['Keone Kela', 9.0],
    ['Brandon Kintzler', 4.0],
    ['Addison Reed', 0.0

In [146]:
pointstry

{'Flyin Castillo Bros.': {'players': [['Luke Weaver', 'SP', 20.0],
   ['Ozzie Albies', '2B', 17.0],
   ['Trea Turner', 'SS', 10.0],
   ['Joey Votto', '1B', 7.0],
   ['Teoscar Hernandez', 'OF', 1.0],
   ['Kyle Seager', '3B', 0.0],
   ['Ronald Acuna', 'OF', 0.0],
   ['Carlos Carrasco', 'SP', 0.0],
   ['Bud Norris', 'SP, RP', 0.0],
   ['Brad Boxberger', 'RP', 0.0],
   ['Chad Green', 'SP, RP', 0.0],
   ['Walker Buehler', 'SP', 0.0],
   ['J.A. Happ', 'SP', 0.0],
   ['Luis Castillo', 'SP', 0.0],
   ['Zack Godley', 'SP', 0.0],
   ['Garrett Richards', 'SP', 0.0],
   ['Luiz Gohara', 'SP', 0.0],
   ['Madison Bumgarner', 'SP', 0.0],
   ['Alex Reyes', 'SP, RP', 0.0],
   ['Rhys Hoskins', '1B, OF', -2.0],
   ['Kenta Maeda', 'SP', -2.0],
   ['Freddie Freeman', '1B, 3B', -3.0],
   ['Jose Martinez', '1B, OF', -3.0],
   ['Welington Castillo', 'C', -4.0]]},
 'Red Bat and Blue Bat': {'players': [['Manny Machado', '3B, SS', 32.0],
   ['Salvador Perez', 'C', 13.0],
   ['Jameson Taillon', 'SP', 12.0],
   ['D

In [157]:
avail_players = []
for i in pointstry:
    for j in pointstry[i]['players']:
        avail_players.append([j][0][0])
avail_players

['Kris Bryant',
 'Michael Conforto',
 'Mike Moustakas',
 'Whit Merrifield',
 'Scott Kingery',
 'Cody Bellinger',
 'Corey Seager',
 'Mitch Haniger',
 'Corey Dickerson',
 'Gerrit Cole',
 'Edwin Diaz',
 'Ken Giles',
 'Zach Davies',
 'Sean Newcomb',
 'James Paxton',
 'Jake Junis',
 'Jose Quintana',
 'Zack Wheeler',
 'Jack Flaherty',
 'Gary Sanchez',
 'Lance Lynn',
 'Justin Smoak',
 'Manny Machado',
 'Salvador Perez',
 'Jameson Taillon',
 'Derek Dietrich',
 'Keone Kela',
 'Kevin Pillar',
 'Buster Posey',
 'Brandon Kintzler',
 'DJ LeMahieu',
 'Marwin Gonzalez',
 'Paul Goldschmidt',
 'Bryce Harper',
 'Justin Turner',
 'Alex Wood',
 'Miles Mikolas',
 'Addison Reed',
 'Archie Bradley',
 'Mike Clevinger',
 'Marcus Stroman',
 'Jeff Samardzija',
 'Chris Archer',
 'Jimmy Nelson',
 'Carlos Santana',
 'Hanley Ramirez',
 'Luke Weaver',
 'Ozzie Albies',
 'Trea Turner',
 'Joey Votto',
 'Teoscar Hernandez',
 'Kyle Seager',
 'Ronald Acuna',
 'Carlos Carrasco',
 'Bud Norris',
 'Brad Boxberger',
 'Chad Gree

70

In [7]:
#save for later for furture player ID look ups

#playerid_list = pd.read_csv('playerid_list.csv')
#for index, row in team_list.iterrows():
#    if playerid_list['mlb_name'].str.contains(row['name']).any():
#        team_list.loc[index, 'id'] = str(playerid_list[playerid_list['mlb_name'] == row['name']]['mlb_id']).split(' ')[4].split('\nName:')[0]
#        team_list.loc[index, 'yahoo_pos'] = str(playerid_list[playerid_list['mlb_name'] == row['name']]['yahoo_pos']).split(' ')[4].split('\nName:')[0]
