### Import packages

In [1]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from pathlib import Path
import pandas as pd
import numpy as np
import random
import glob
import os

from add_features import add_xy_future, add_velocity_xy, add_acceleration_xy, add_average_velocity, add_orientation, add_ball_in_motion, add_distance_to_ball, add_angle_to_ball, add_offside, load_FM_data, add_FM_data, add_tiredness, add_tiredness_short_term
from visualize_game import visualize_game_animation, visualize_prediction_animation
from utils import google_sheet_to_df, load_processed_frames, extract_variables, load_tf_model, prepare_EL_input_data, prepare_df, total_error_loss, smooth_predictions_xy, run_model
from settings import *

2024-04-18 14:37:11.767828: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### Necessary columns
| Column Name    | Description                                        |
|----------------|----------------------------------------------------|
| player         | The name of the player                             |
| jersey_number  | The jersey number of the player                    |
| team           | 'home_team', 'away_team', or 'ball'                |
| team_name      | The team of the player                             |
| period         | The period of the game (1 or 2)                    |
| minute         | The minute of the game                             |
| second         | The second within the current minute               |
| frame          | The frame of the game                              |
| distance_ran   | The cumulative distance covered by the player      |
| x              | The x-coordinate of the player                     |
| y              | The y-coordinate of the player                     |

## Define make prediction

In [2]:
# Add all features
def add_all_features(frames_df):
    # Add the following features
    frames_df = add_xy_future(frames_df, FPS * seconds_into_the_future)
    frames_df = add_velocity_xy(frames_df, 1, smooth=True)
    frames_df = add_acceleration_xy(frames_df, 1, smooth=True)
    frames_df = add_average_velocity(frames_df)
    frames_df = add_orientation(frames_df)
    frames_df = add_ball_in_motion(frames_df)
    frames_df = add_distance_to_ball(frames_df)
    frames_df = add_angle_to_ball(frames_df)
    frames_df = add_offside(frames_df)
    frames_df = add_FM_data(frames_df, load_FM_data())
    frames_df = add_tiredness(frames_df)
    frames_df = add_tiredness_short_term(frames_df, window=FPS*20)

    return frames_df

In [3]:
# Example usage: run_model(test_frames_dfs, "NN_model_v1") 
def run_model(frames_dfs, model_name):
    # Load varibles
    numerical_cols, categorical_cols, positions, sequence_length = extract_variables(model_name)

    # Load model
    model = load_tf_model(f"models/{model_name}.h5", euclidean_distance_loss=True)

    # Prepare the input data for LSTM model
    if "LSTM" in model_name:
        # Prepare data for LSTM
        prepared_frames_df = prepare_LSTM_df(frames_dfs, numerical_cols, categorical_cols, sequence_length, positions)

        # # Only keep rows that can be sequentialized
        # print(prepared_frames_df)

        # # Sort the DataFrame by 'team', 'match_id', and most importantly 'player'
        # prepared_frames_df = prepared_frames_df.sort_values(by=['team', 'match_id', 'player'])

    # Prepare the input data for non-LSTM model
    else:
        X_test_input, y_test = prepare_EL_input_data(frames_dfs, numerical_cols, categorical_cols, positions, downsampling_factor=1)

        # Prepared the DataFrames and concatenate into a single large DataFrame
        prepared_frames_dfs = [prepare_df(frames_df, numerical_cols, categorical_cols, positions=positions, downsampling_factor=1) for frames_df in frames_dfs]
        prepared_frames_df = pd.concat(prepared_frames_dfs, ignore_index=True)

    # Make predictions using the loaded tf model
    predictions = model.predict(X_test_input)

    # Extract the predicted values
    x_future_pred = predictions[:, 0]
    y_future_pred = predictions[:, 1]

    # Add the predicted values to 'prepared_frames_df'
    prepared_frames_df['x_future_pred'] = x_future_pred
    prepared_frames_df['y_future_pred'] = y_future_pred

    # Clip values to stay on the pitch
    prepared_frames_df['x_future_pred'] = prepared_frames_df['x_future_pred'].clip(lower=0, upper=pitch_length)
    prepared_frames_df['y_future_pred'] = prepared_frames_df['y_future_pred'].clip(lower=0, upper=pitch_width)

    # Smooth the predicted coordinates
    # smooth_predictions_xy(frames_df, alpha=0.98)

    return prepared_frames_df

In [4]:
# Create the vectors 'x_future_pred' and 'y_future_pred' on frames_df
def make_predictions(frames_df, model_name):
    # Prepare the DataFrame by adding all features
    frames_df = add_all_features(frames_df)

    # Run the model and add the vectors 'x_future_pred' and 'y_future_pred' to frames_df
    frames_df = run_model([frames_df], model_name)

    # Calculate the error
    error = total_error_loss(frames_df)
    print(f"Error: {error}")

    return frames_df

## Example usage

In [5]:
# Example match
DATA_FOLDER_UNPROCESSED = f"{DATA_LOCAL_FOLDER}/data/2023/Allsvenskan/unprocessed"
match_id = '8ef97096-db3b-4597-8dfb-3bca4e69586b'
file_path_match = f"{DATA_FOLDER_UNPROCESSED}/{match_id}.parquet"

# Convert parquet file to a DataFrame
frames_df = pd.read_parquet(file_path_match)

# Make the prediction
frames_df = make_predictions(frames_df, "NN_best_v1")



2024-04-18 14:37:45.427398: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31133 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:8a:00.0, compute capability: 7.0


Error: 2.843


In [6]:
visualize_prediction_animation(frames_df, 1100, 1200, "NN")

In [7]:
frames_df.columns

Index(['team', 'team_name', 'team_direction', 'jersey_number', 'player',
       'role', 'distance_ran', 'x', 'y', 'frame', 'minute', 'second', 'period',
       'events', 'objects_tracked', 'x_future', 'y_future', 'v_x', 'v_y',
       'a_x', 'a_y', 'v_x_avg', 'v_y_avg', 'orientation', 'ball_in_motion',
       'distance_to_ball', 'angle_to_ball', 'offside', 'nationality', 'height',
       'weight', 'acc', 'pac', 'sta', 'position', 'tiredness',
       'tiredness_short', 'x_future_pred', 'y_future_pred', 'pred_error'],
      dtype='object')

In [8]:
for i in [1, 0.99, 0.90, 0.80, 0.60]:
    frames_smooth_df = smooth_predictions_xy(frames_df.copy(), alpha=0.98)

    # Calculate the error
    error = total_error_loss(frames_smooth_df)
    print(f"Error: {error} with alpha={i}")

KeyError: 'match_id'