In [2]:
import joblib
import json
import os
import pandas as pd
xg_model = joblib.load('xg_model.sav')
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

#### This code imports the model from the prior script and creates a similar shot_matrix function to find distance from goal and angle to goal for each individual shot event using sofascore data ####
#### It cleanses sofascore data into a format that is compatible with our xG model ####
#### It will also index whether a shot is from the home team or the away team and label them accordingly ####

In [3]:
def shot_matrix(eventdata):
    with open(eventdata) as f:
        data = json.load(f)

    # Create an empty list to store shot data
    shots_list = []

    # Iterate through the 'shotmap' list
    for shot_data in data.get('shotmap', []):
        # Create a dictionary to store shot attributes
        shot_dict = {}

        # Extract attributes from the shot_data dictionary
        shot_dict['Goal'] = 1 if shot_data['shotType'] == 'goal' else 0
        shot_dict['header'] = 1 if shot_data['bodyPart'] == 'head' else 0
        shot_dict['x'] =  shot_data['playerCoordinates']['y']-(37.66) 
        shot_dict['y'] = shot_data['playerCoordinates']['x']
        shot_dict['Center_dis'] = abs(shot_dict['x'] - 34)
        shot_dict['teamid'] = 'shels' if shot_data['isHome'] == True else 'pats'
        
        x_dummy = shot_dict['x']

        # Calculate Distance
        shot_dict['Distance']=(np.sqrt(shot_dict['x']**2+shot_dict['y']**2))
        x = shot_dict['x']
        y = shot_dict['y']
        # Calculate Angle Radians
        width = 7.32
        a = np.sqrt((y - width / 2) ** 2 + x ** 2)
        b = np.sqrt((y + width / 2) ** 2 + x ** 2)
        k = (width ** 2 - a ** 2 - b ** 2) / (-2 * a * b)
        gamma = np.arcsin(k)
        if gamma < 0:
            gamma = np.pi + gamma
        shot_dict['Angle Radians'] = (np.pi/2)-(gamma)
        shot_dict['Angle Degrees'] = (gamma * 180 / np.pi)

        # Add identifiers for player, team, and match
        
        
       

        # Append the shot data to the list
        shots_list.append(shot_dict)

    # Create a DataFrame from the list of shot data
    shots_dataset = pd.DataFrame(shots_list)

    return shots_dataset
         

#### Now we will create a dataframe using arbitrary game data from sofascore ####

In [6]:
directory = '/Users/adambrowne/Desktop/Personal /LOI Project/Data/League of Ireland/'
shot_list = []
jsonfiles = []
for path in os.listdir(directory):
    jsonfiles.append(os.path.join(directory,path))
for file in jsonfiles:
    shot_list.append(shot_matrix(file))
df = pd.concat(shot_list)
df['header']=pd.to_numeric(df['header'])
df['Goal']= pd.to_numeric(df['Goal'])
df

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x87 in position 23: invalid start byte

#### Now the xG model from the previous script is loaded and applied to the data frame to add a column for xG ####

In [None]:
xg_model = joblib.load('xg_lgm4_model.sav')
# Transform sofascore input data to match the feature set used during model training
x_test_features = df[['Distance', 'Angle Radians', 'header']].copy()

# Make predictions for xG
print(x_test_features)
y_pred = xg_model.predict_proba(x_test_features)[:,1]
df['xG'] = y_pred+0.1

filename = 'patsvshels.csv'
o_path = '/Users/adambrowne/Desktop/Personal /LOI Project/Data/' + filename

 # Set column headers
delimiter = '\t'  # Use tab as the delimiter
decimal_format = '%.2f'  # Format numeric columns with two decimal places
encoding = 'utf-8'  # Specify encoding
headers = 'Goal','header',	'x',	'y',	'Center_dis',	'teamid',	'Distance',	'Angle Radians',	'Angle Degrees', 'xG'
df.to_csv(o_path, sep=delimiter, header=headers, index=False , float_format=decimal_format, encoding=encoding)
df


NameError: name 'joblib' is not defined