In [4]:
import requests # apache http library
import xml.etree.ElementTree as ET # XML parsing 
import os # file and system level 

import pandas as pd  # dataframes, structures, analysis 
import numpy as np   # used by pandas

#enable inline notebook plotting 
%matplotlib inline  

from termcolor import colored  # colored output using print
print("Hello world.")
print(colored("Hello back.", "red"))

Hello world.
[31mHello back.[0m


## Pull Players into Dictionary

In [5]:
# url of players feed 
url = 'https://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_18/gid_2014_06_18_colmlb_lanmlb_1/players.xml'
#url = "https://gd2.mlb.com/components/game/mlb/year_2018/month_08/day_12/gid_2018_08_12_lanmlb_colmlb_1/players.xml"
resp = requests.get(url) 
print(colored(resp,"blue"))
xmlfile = "myplayers.xml"
with open(xmlfile, 'wb') as f: 
    f.write(resp.content)
statinfo = os.stat(xmlfile)  
print(colored(xmlfile + ": " + str(round(statinfo.st_size/1024)) + " KB\n", "blue"))

# Pull players into tree structure <game><team><player><player></team><team><player><player></team></game>
tree = ET.parse(xmlfile)
game = tree.getroot()
teams = game.findall("./team")
playerDict = {}

### Verify Player Dictionary
for team in teams:
    print(team.attrib.get("name"))
    players = team.findall("./player")
    for player in players:
        print("   ", player.attrib.get("id"), player.attrib.get("first"), player.attrib.get("last"))
        playerDict[ player.attrib.get("id") ] = player.attrib.get("first") + " " + player.attrib.get("last")

[34m<Response [200]>[0m
[34mmyplayers.xml: 16 KB
[0m
Colorado Rockies
    518934 DJ LeMahieu
    572816 Corey Dickerson
    279571 Matt Belisle
    453211 Drew Stubbs
    554431 Tyler Matzek
    408047 Justin Morneau
    462985 Franklin Morales
    592621 Kyle Parker
    571521 Rex Brothers
    548357 Christian Bergman
    493603 Adam Ottavino
    501647 Wilin Rosario
    434665 Nick Masset
    453568 Charlie Blackmon
    488681 Brandon Barnes
    502374 Michael McKenry
    115629 LaTroy Hawkins
    518586 Charlie Culberson
    592454 Tommy Kahnle
    592710 Josh Rutledge
    453064 Troy Tulowitzki
    407822 Jorge De La Rosa
    468504 Jhoulys Chacin
    572253 Ryan Wheeler
    455119 Chris Martin
Los Angeles Dodgers
    508892 Carlos Triunfel
    475100 Scott Van Slyke
    453198 Chris Perez
    454560 A.J. Ellis
    461314 Matt Kemp
    425844 Zack Greinke
    444843 Andre Ethier
    434181 Brandon League
    460077 Drew Butera
    124604 Jamey Wright
    434442 J.P. Howell
    

In [8]:
playerDict["624577"]

'Yasiel Puig'

## Clayton Kershaw No-Hitter - June 18, 2014, Dodgers vs. Rockies, 8-0

Jun 18, 2014 - Kershaw no-hit the Rockies while striking out 15 and not walking a single batter. There have been 283 no-hitters in baseball history, but Kershaw is the only one to have at least 15 strikeouts and no walks. Only one other pitcher has even thrown a no-hitter with at least 15 strikeouts. ... He needed only 107 pitches.

#### Get Innings_All Data

In [13]:
url = 'https://www.powerchalk.com/downloads/pitchfx/inning_all.xml'
resp = requests.get(url) 
print(colored(resp,"blue"))
xmlfile = "mygame.xml"
with open(xmlfile, 'wb') as f: 
    f.write(resp.content)
statinfo = os.stat(xmlfile)  
print(colored(xmlfile + ": " + str(round(statinfo.st_size/1024)) + " KB\n", "blue"))

# Pull XML into tree structure
tree = ET.parse(xmlfile)
root = tree.getroot()
print("Tree.root.tag = " + root.tag)

# Unpack the game to find innings
for child in root:
    print (child.tag, child.attrib.get("num"))
    for frame in child:
        print ("   ", frame.tag, frame.attrib) # type(frame))

[34m<Response [200]>[0m
[34mmygame.xml: 196 KB
[0m
Tree.root.tag = game
inning 1
    top {}
    bottom {}
inning 2
    top {}
    bottom {}
inning 3
    top {}
    bottom {}
inning 4
    top {}
    bottom {}
inning 5
    top {}
    bottom {}
inning 6
    top {}
    bottom {}
inning 7
    top {}
    bottom {}
inning 8
    top {}
    bottom {}
inning 9
    top {}


In [14]:
frames = ["top", "bottom"]
pitchDictionary = { "FA":"fastball", "FF":"4-seam fb", "FT": "2-seam fb", "FC": "fb-cutter", "":"unknown", None: "none",
                    "FS":"fb-splitter", "SL":"slider", "CH":"changeup","CU":"curveball","KC":"knuckle-curve",
                    "KN":"knuckleball","EP":"eephus", "UN":"unidentified", "PO":"pitchout", "SI":"sinker", "SF":"split-finger"
                    }
totalPitchCount = 0

innings = root.findall("./inning")
for inning in innings:
    for i in range(len(frames)):
        color = "green" if i == 0 else "yellow" #show top of inning in green; bottom in yellow
        print(colored("\nInning " + inning.attrib.get("num") + " (" + frames[i] + ")", color, attrs=['reverse']))
        fr = inning.find(frames[i])
        if fr is not None:
            for ab in fr.iter('atbat'):
                #battername = ab.get('batter')
                battername = playerDict[ab.get('batter')]
                abPitchCount = 0
                print (colored("   " + battername, color, attrs=['bold']))
                
                pitches = ab.findall("pitch")
                #print(pitches)
                for p in pitches:
                    abPitchCount = abPitchCount + 1
                    totalPitchCount = totalPitchCount + 1
                    verbosePitch = pitchDictionary[p.get("pitch_type")]
                    print(colored("      pitch " + str(abPitchCount) + ": " + verbosePitch, color))
                print("      " + colored( ab.attrib.get("event"), color, attrs=['underline']))
print("Total pitches: " + str(totalPitchCount))

[7m[32m
Inning 1 (top)[0m
[1m[32m   Corey Dickerson[0m
[32m      pitch 1: 4-seam fb[0m
[32m      pitch 2: 4-seam fb[0m
[32m      pitch 3: 4-seam fb[0m
[32m      pitch 4: slider[0m
[32m      pitch 5: 4-seam fb[0m
[32m      pitch 6: slider[0m
      [4m[32mStrikeout[0m
[1m[32m   Brandon Barnes[0m
[32m      pitch 1: 4-seam fb[0m
      [4m[32mFlyout[0m
[1m[32m   Troy Tulowitzki[0m
[32m      pitch 1: 4-seam fb[0m
[32m      pitch 2: slider[0m
[32m      pitch 3: curveball[0m
[32m      pitch 4: slider[0m
[32m      pitch 5: 4-seam fb[0m
      [4m[32mGroundout[0m
[7m[33m
Inning 1 (bottom)[0m
[1m[33m   Dee Strange-Gordon[0m
[33m      pitch 1: 4-seam fb[0m
[33m      pitch 2: 4-seam fb[0m
[33m      pitch 3: 4-seam fb[0m
[33m      pitch 4: 4-seam fb[0m
[33m      pitch 5: 4-seam fb[0m
[33m      pitch 6: 4-seam fb[0m
[33m      pitch 7: 4-seam fb[0m
[33m      pitch 8: 4-seam fb[0m
      [4m[33mWalk[0m
[1m[33m   Hanley Ramirez[0m
[

## Load Inning Detail in to Dataframe

#### Gameday Pitch Fields Definitions: 
https://fastballs.wordpress.com/2007/08/02/glossary-of-the-gameday-pitch-fields/