In [857]:
import xmltodict
import numpy as np
from sklearn import linear_model
import os, zipfile
import enum
import bokeh

In [850]:
class ImportOpta(object):
    def __init__(self, path, run=True):
        self.path = path
        self.file_paths = []
        self.instances = []
        
        if run:
            self.get_file_paths()
            self.create_instances()
            
    def get_file_paths(self):
        for folder_name in os.listdir(self.path)[1:]: # loop through items in dir
            folder_path = self.path + '/' + folder_name
            zip_file_name = os.listdir(folder_path)[-1]
            if zip_file_name.endswith(".zip"): # check for ".zip" extension
                zip_file_path = folder_path + '/' + zip_file_name # get full path of files
                zip_ref = zipfile.ZipFile(zip_file_path, 'r') # create zipfile object
                zip_ref.extractall(folder_path) # extract file to dir
                zip_ref.close() # close file
                os.remove(zip_file_path) # delete zipped file
            folder_items = os.listdir(folder_path)
            for file_name in folder_items:
                if 'f24' in file_name:
                    self.file_paths.append(folder_path + '/' + file_name)
                    break
                elif 'Opta' in file_name or 'opta_import' == file_name:
                    sub_folder_path = folder_path + '/' + file_name
                    sub_folder_items = os.listdir(sub_folder_path)
                    for file_name2 in sub_folder_items:
                        if 'f24' in file_name2:
                            self.file_paths.append(sub_folder_path + '/' + file_name2)
                            
    def create_instances(self):
        for file_path in self.file_paths:
            with open(file_path) as fd:
                [self.instances.append(Shot(item)) for item in xmltodict.parse(fd.read())['Games']['Game']['Event'] 
                 if item['@type_id'] in ['13', '14', '15', '16']];

In [851]:
class BodyPart(enum.Enum):
    head = 0
    left_foot = 1
    right_foot = 2
    other = 3
    none = 4

In [852]:
 class ShotPitchLocation(enum.Enum):
    small_box_center = 0
    box_center = 1
    out_of_box_center = 2
    center_35_plus = 3
    small_box_right = 4
    small_box_left = 5
    box_deep_right = 6
    box_right = 7
    box_left = 8
    box_deep_left = 9
    out_of_box_deep_right = 10
    out_of_box_right = 11
    out_of_box_left = 12
    out_of_box_deep_left = 13
    right_35_plus = 14
    left_35_plus = 15
    none = 16

In [853]:
class ShotGoalLocation(enum.Enum):
    left = 0
    high = 1
    right = 2
    low_left = 3
    high_left = 4
    low_center = 5
    high_center = 6
    low_right = 7
    high_right = 8
    blocked = 9
    close_left = 10
    close_right = 11
    close_high = 12
    close_left_and_high = 13
    close_right_and_high = 14
    none = 15

In [854]:
class PatternOfPlay(enum.Enum):
    regular_play = 0
    fast_break = 1
    set_piece = 2
    from_corner = 3
    from_kick = 4
    none = 5

In [855]:
class Shot(object):
    def __init__(self, item):
        self.item = item
        self.pitch_length = 105.0
        self.pitch_width = 68.0
        
    @property
    def x_raw(self):
        return float(self.item['@x'])
    
    @property
    def y_raw(self):
        return float(self.item['@y'])
    
    @property
    def x(self):
        return self.x_raw / 100.0 * self.pitch_length
    
    @property
    def y(self):
        return self.y_raw / 100.0 * self.pitch_width
    
    @property
    def minute(self):
        return self.item['@min']
    
    @property
    def second(self):
        return self.item['@sec']
    
    @property
    def team(self):
        return self.item['@team_id']
    
    @property
    def made(self):
        return self.item['@type_id'] == '16'
    
    @property
    def distance_raw(self):
        return np.sqrt((abs(float(self.y_raw) - 50))**2 + ((100 - float(self.x_raw)))**2)
    
    @property
    def distance(self):
        return np.sqrt((abs(float(self.y) - self.pitch_width * .5))**2 + 
                       ((self.pitch_length - float(self.x)))**2)
    
    @property
    def qualifiers(self):
        return [qualifier['@qualifier_id'] for qualifier in self.item['Q']]
    
    @property
    def body_part(self):
        if '15' in self.qualifiers:
            return BodyPart.head
        elif '72' in self.qualifiers:
            return BodyPart.left_foot
        elif '20' in self.qualifiers:
            return BodyPart.right_foot
        elif '21' in self.qualifiers:
            return BodyPart.other
        else:
            return BodyPart.none
    
    @property
    def assisted(self):
        return '29' in self.qualifiers
    
    @property
    def individual_play(self):
        return '215' in self.qualifiers
    
    @property
    def shot_pitch_location(self):
        if '16' in self.qualifiers:
            return ShotPitchLocation.small_box_center
        elif '17' in self.qualifiers:
            return ShotPitchLocation.box_center
        elif '18' in self.qualifiers:
            return ShotPitchLocation.out_of_box_center
        elif '19' in self.qualifiers:
            return ShotPitchLocation.center_35_plus
        elif '60' in self.qualifiers:
            return ShotPitchLocation.small_box_right
        elif '61' in self.qualifiers:
            return ShotPitchLocation.small_box_left
        elif '62' in self.qualifiers:
            return ShotPitchLocation.box_deep_right
        elif '63' in self.qualifiers:
            return ShotPitchLocation.box_right
        elif '64' in self.qualifiers:
            return ShotPitchLocation.box_left
        elif '65' in self.qualifiers:
            return ShotPitchLocation.box_deep_left
        elif '66' in self.qualifiers:
            return ShotPitchLocation.out_of_box_deep_right
        elif '67' in self.qualifiers:
            return ShotPitchLocation.out_of_box_right
        elif '68' in self.qualifiers:
            return ShotPitchLocation.out_of_box_left
        elif '69' in self.qualifiers:
            return ShotPitchLocation.out_of_box_deep_left
        elif '70' in self.qualifiers:
            return ShotPitchLocation.right_35_plus
        elif '71' in self.qualifiers:
            return ShotPitchLocation.left_35_plus
        else:
            return ShotPitchLocation.none
        
    @property
    def shot_goal_location(self):
        if '73' in self.qualifiers:
            return ShotGoalLocation.left
        elif '74' in self.qualifiers:
            return ShotGoalLocation.high
        elif '75' in self.qualifiers:
            return ShotGoalLocation.right
        elif '76' in self.qualifiers:
            return ShotGoalLocation.low_left
        elif '77' in self.qualifiers:
            return ShotGoalLocation.high_left
        elif '78' in self.qualifiers:
            return ShotGoalLocation.low_center
        elif '79' in self.qualifiers:
            return ShotGoalLocation.high_center
        elif '80' in self.qualifiers:
            return ShotGoalLocation.low_right
        elif '81' in self.qualifiers:
            return ShotGoalLocation.high_right
        elif '82' in self.qualifiers:
            return ShotGoalLocation.blocked
        elif '83' in self.qualifiers:
            return ShotGoalLocation.close_left
        elif '84' in self.qualifiers:
            return ShotGoalLocation.close_right
        elif '85' in self.qualifiers:
            return ShotGoalLocation.close_high
        elif '86' in self.qualifiers:
            return ShotGoalLocation.close_left_and_high
        elif '87' in self.qualifiers:
            return ShotGoalLocation.close_right_and_high
        else:
            return ShotGoalLocation.none
    
    @property
    def pattern_of_play(self):
        if '22' in self.qualifiers:
            return PatternOfPlay.regular_play
        elif '23' in self.qualifiers:
            return PatternOfPlay.fast_break
        elif '24' in self.qualifiers:
            return PatternOfPlay.set_piece
        elif '25' in self.qualifiers:
            return PatternOfPlay.from_corner
        elif '26' in self.qualifiers:
            return PatternOfPlay.from_kick
        else:
            return PatternOfPlay.none

In [856]:
instances = ImportOpta('/Users/jason.katz/Downloads/AllOpta')

In [848]:
objs.objs[0].pattern_of_play.name

'set_piece'

In [849]:
len(objs.objs)

11584

In [822]:
objs3 = []
[objs3.append(Shot(item)) for item in doc['Games']['Game']['Event'] if item['@type_id'] in ['13', '14', '15', '16']];

In [832]:
objs3[0].x_raw

80.2

In [796]:
for item in objs2[0:25]:
    print(item.body_part.name)

left_foot
right_foot
right_foot
right_foot
head
left_foot
right_foot
right_foot
head
left_foot
left_foot
head
right_foot
left_foot
right_foot
head
left_foot
right_foot
right_foot
right_foot
right_foot
left_foot
left_foot
right_foot
left_foot


In [458]:
doc['Games']['Game']['Event'][41]

OrderedDict([('@id', '580874171'),
             ('@event_id', '17'),
             ('@type_id', '13'),
             ('@period_id', '1'),
             ('@min', '2'),
             ('@sec', '4'),
             ('@player_id', '60270'),
             ('@team_id', '21'),
             ('@outcome', '1'),
             ('@x', '80.2'),
             ('@y', '72.3'),
             ('@timestamp', '2016-09-25T16:02:20.805'),
             ('@last_modified', '2016-09-25T16:03:23'),
             ('@version', '1474815803342'),
             ('Q',
              [OrderedDict([('@id', '307609787'),
                            ('@qualifier_id', '55'),
                            ('@value', '16')]),
               OrderedDict([('@id', '1992560218'), ('@qualifier_id', '72')]),
               OrderedDict([('@id', '367448276'), ('@qualifier_id', '215')]),
               OrderedDict([('@id', '1829415703'), ('@qualifier_id', '29')]),
               OrderedDict([('@id', '792447111'), ('@qualifier_id', '18')]),
          

In [332]:
doc['Games']['Game']['Event'][41]['Q']

[OrderedDict([('@id', '307609787'),
              ('@qualifier_id', '55'),
              ('@value', '16')]),
 OrderedDict([('@id', '1992560218'), ('@qualifier_id', '72')]),
 OrderedDict([('@id', '367448276'), ('@qualifier_id', '215')]),
 OrderedDict([('@id', '1829415703'), ('@qualifier_id', '29')]),
 OrderedDict([('@id', '792447111'), ('@qualifier_id', '18')]),
 OrderedDict([('@id', '1134107520'), ('@qualifier_id', '22')]),
 OrderedDict([('@id', '881618325'), ('@qualifier_id', '73')]),
 OrderedDict([('@id', '2139138198'),
              ('@qualifier_id', '103'),
              ('@value', '2.8')]),
 OrderedDict([('@id', '927881923'),
              ('@qualifier_id', '56'),
              ('@value', 'Center')]),
 OrderedDict([('@id', '1365435949'),
              ('@qualifier_id', '102'),
              ('@value', '61.3')])]

In [231]:
# Create linear regression object
reg = linear_model.LinearRegression()

# we create an instance of Neighbours Classifier and fit the data.
reg.fit(np.transpose(np.matrix(distance)), made)

# The coefficients
print('Coefficients: \n', reg.coef_)

Coefficients: 
 [-0.01370518]


In [7]:
with open('f24-8-2016-855231-eventdetails.xml') as fd:
    doc = xmltodict.parse(fd.read())

In [765]:
extension = ".zip"
dir_name = '/Users/jason.katz/Downloads/AllOpta'
file_paths = []
for folder_name in os.listdir(dir_name)[1:]: # loop through items in dir
    folder_path = dir_name + '/' + folder_name
    zip_file_name = os.listdir(folder_path)[-1]
    if zip_file_name.endswith(extension): # check for ".zip" extension
        zip_file_path = folder_path + '/' + zip_file_name # get full path of files
        zip_ref = zipfile.ZipFile(zip_file_path, 'r') # create zipfile object
        zip_ref.extractall(folder_path) # extract file to dir
        zip_ref.close() # close file
        os.remove(zip_file_path) # delete zipped file
    folder_items = os.listdir(folder_path)
    for file_name in folder_items:
        if 'f24' in file_name:
            file_paths.append(folder_path + '/' + file_name)
            break
        elif 'Opta' in file_name or 'opta_import' == file_name:
            sub_folder_path = folder_path + '/' + file_name
            sub_folder_items = os.listdir(sub_folder_path)
            for file_name2 in sub_folder_items:
                if 'f24' in file_name2:
                    file_paths.append(sub_folder_path + '/' + file_name2)
file_paths

['/Users/jason.katz/Downloads/AllOpta copy/56d875256bcfc73788d2d985/f24-8-2015-803378-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/56d8b77bca45ea00349a40da/f24-8-2015-803418-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/56f0146f1c374d31a4109265/f24-8-2015-803443-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/56f0152e234c803008d7560f/f24-8-2015-803434-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/570fb8de1c374d0dc80a2b57/OptaUpload/f24-8-2015-803424-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/570fb8f0234c803b18b8ee27/OptaUpload/f24-8-2015-803436-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/570fb8fd1c374d0dc80a2b5d/OptaUpload/f24-8-2015-803444-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/570fb993b621e93308efc2c5/OptaUpload/f24-8-2015-803458-eventdetails.xml',
 '/Users/jason.katz/Downloads/AllOpta copy/570fb9d763538e24bc8285e4/OptaUpload/f24-8-2015-803464-eventdetails.xml',


In [781]:
objs = []
for file_path in file_paths:
    with open(file_path) as fd:
        [objs.append(Shot(item)) for item in xmltodict.parse(fd.read())['Games']['Game']['Event'] 
         if item['@type_id'] in ['13', '14', '15', '16']];