In [1]:
import joblib
import json
import os
import pandas as pd
xg_model = joblib.load('xg_model.sav')
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

In [2]:
def shot_matrix(eventdata):
    with open(eventdata) as f:
        data = json.load(f)

    # Create an empty list to store shot data
    shots_list = []

    # Iterate through the 'shotmap' list
    for shot_data in data.get('shotmap', []):
        # Create a dictionary to store shot attributes
        shot_dict = {}

        # Extract attributes from the shot_data dictionary
        shot_dict['Goal'] = 1 if shot_data['shotType'] == 'goal' else 0
        shot_dict['header'] = 1 if shot_data['bodyPart'] == 'head' else 0
        shot_dict['x'] =  shot_data['playerCoordinates']['y']-(37.66) 
        shot_dict['y'] = shot_data['playerCoordinates']['x']
        shot_dict['Center_dis'] = abs(shot_dict['x'] - 34)
        shot_dict['teamid'] = 'shels' if shot_data['isHome'] == True else 'pats'
        
        x_dummy = shot_dict['x']

        # Calculate Distance
        shot_dict['Distance']=(np.sqrt(shot_dict['x']**2+shot_dict['y']**2))
        x = shot_dict['x']
        y = shot_dict['y']
        # Calculate Angle Radians
        width = 7.32
        a = np.sqrt((y - width / 2) ** 2 + x ** 2)
        b = np.sqrt((y + width / 2) ** 2 + x ** 2)
        k = (width ** 2 - a ** 2 - b ** 2) / (-2 * a * b)
        gamma = np.arcsin(k)
        if gamma < 0:
            gamma = np.pi + gamma
        shot_dict['Angle Radians'] = (np.pi/2)-(gamma)
        shot_dict['Angle Degrees'] = (gamma * 180 / np.pi)

        # Add identifiers for player, team, and match
        
        
       

        # Append the shot data to the list
        shots_list.append(shot_dict)

    # Create a DataFrame from the list of shot data
    shots_dataset = pd.DataFrame(shots_list)

    return shots_dataset
         

In [3]:
directory = '/Users/adambrowne/Desktop/Personal /LOI Project/Data/League of Ireland/'
shot_list = []
jsonfiles = []
for path in os.listdir(directory):
    jsonfiles.append(os.path.join(directory,path))
for file in jsonfiles:
    shot_list.append(shot_matrix(file))
df = pd.concat(shot_list)
df['header']=pd.to_numeric(df['header'])
df['Goal']= pd.to_numeric(df['Goal'])

In [4]:
df

Unnamed: 0,Goal,header,x,y,Center_dis,teamid,Distance,Angle Radians,Angle Degrees
0,0,0,7.44,54.4,26.56,shels,54.906408,0.018144,88.960446
1,1,1,12.24,12.8,21.76,shels,17.710381,0.289984,73.385139
2,0,1,5.94,13.9,28.06,shels,15.116005,0.199456,78.571991
3,0,1,19.64,10.5,14.36,shels,22.270599,0.289535,73.410876
4,1,0,11.34,4.9,22.66,shels,12.353364,0.537683,59.193021
5,0,0,12.24,25.1,21.76,shels,27.925393,0.116373,83.33232
6,0,0,2.44,10.2,31.56,shels,10.487783,0.182833,79.52442
7,0,0,23.44,26.5,10.56,shels,35.379141,0.137686,82.111161
8,0,0,9.54,14.0,24.46,shels,16.941417,0.249887,75.68251
9,0,0,-2.76,8.4,36.76,shels,8.84181,0.302306,72.679158


In [19]:
xg_model = joblib.load('xg_lgm5_model.sav')
# Transform your input data to match the feature set used during model training
x_test_features = df[['Distance', 'Angle Radians', 'header']].copy()

# Make predictions for xG
print(x_test_features)
y_pred = xg_model.predict_proba(x_test_features)[:,1]
df['xG'] = y_pred+0.1
df
filename = 'patsvshels.csv'
o_path = '/Users/adambrowne/Desktop/Personal /LOI Project/Data/' + filename

 # Set column headers
delimiter = '\t'  # Use tab as the delimiter
decimal_format = '%.2f'  # Format numeric columns with two decimal places
encoding = 'utf-8'  # Specify encoding
headers = 'Goal','header',	'x',	'y',	'Center_dis',	'teamid',	'Distance',	'Angle Radians',	'Angle Degrees', 'xG'
df.to_csv(o_path, sep=delimiter, header=headers, index=False , float_format=decimal_format, encoding=encoding)



     Distance  Angle Radians  header
0   54.906408       0.018144       0
1   17.710381       0.289984       1
2   15.116005       0.199456       1
3   22.270599       0.289535       1
4   12.353364       0.537683       0
5   27.925393       0.116373       0
6   10.487783       0.182833       0
7   35.379141       0.137686       0
8   16.941417       0.249887       0
9    8.841810       0.302306       0
10  32.786028       0.216788       0
11  33.068468       0.193667       0
12  11.986809       0.080728       0
13  30.871501       0.204624       0
14  34.925028       0.125189       0
15  44.480036       0.105547       0
16  28.094903       0.135392       0
17  26.794992       0.239854       0
18  23.303253       0.204943       0
19  14.903879       0.011923       0
20  36.656426       0.141778       0
21  24.701166       0.076393       0
22  27.309551       0.245221       0
23  27.942469       0.014689       0
24  10.900165       0.004166       0
25  29.558681       0.066608       0
2