### Import packages

In [1]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from pathlib import Path
import pandas as pd
import numpy as np
import random
import glob
import os

from add_features import add_xy_future, add_velocity_xy, add_acceleration_xy, add_average_velocity, add_orientation, add_ball_in_motion, add_distance_to_ball, add_angle_to_ball, add_offside, add_distance_to_onside, load_FM_data, add_FM_data, add_tiredness, add_tiredness_short_term
from visualize_game import visualize_game_animation, visualize_prediction_animation
from utils import denominators, google_sheet_to_df, load_processed_frames, extract_variables, load_tf_model, prepare_EL_input_data, prepare_df, prepare_LSTM_df, prepare_LSTM_input_data, total_error_loss, smooth_predictions_xy, run_model, print_column_variance
from settings import *

2024-04-24 12:21:07.136772: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### Necessary columns
| Column Name    | Description                                        |
|----------------|----------------------------------------------------|
| player         | The name of the player                             |
| jersey_number  | The jersey number of the player                    |
| team           | 'home_team', 'away_team', or 'ball'                |
| team_name      | The team of the player                             |
| period         | The period of the game (1 or 2)                    |
| minute         | The minute of the game                             |
| second         | The second within the current minute               |
| frame          | The frame of the game                              |
| distance_ran   | The cumulative distance covered by the player      |
| x              | The x-coordinate of the player                     |
| y              | The y-coordinate of the player                     |

## Define make prediction

In [2]:
# Add all features
def add_all_features(frames_df):
    # Add the following features
    frames_df = add_xy_future(frames_df, FPS * seconds_into_the_future)
    frames_df = add_velocity_xy(frames_df, 1, smooth=True)
    frames_df = add_acceleration_xy(frames_df, 1, smooth=True)
    frames_df = add_average_velocity(frames_df)
    frames_df = add_orientation(frames_df)
    frames_df = add_ball_in_motion(frames_df)
    frames_df = add_distance_to_ball(frames_df)
    frames_df = add_angle_to_ball(frames_df)
    # frames_df = add_offside(frames_df)
    frames_df = add_distance_to_onside(frames_df)
    frames_df = add_FM_data(frames_df, load_FM_data())
    frames_df = add_tiredness(frames_df)
    frames_df = add_tiredness_short_term(frames_df, window=FPS*20)

    # Add an imaginary 'match_id'
    frames_df['match_id'] = 1

    return frames_df

In [3]:
# Example usage: run_model(test_frames_dfs, "NN_model_v1") 
def run_model(frames_dfs, model_name):
    # Load varibles
    numerical_cols, categorical_cols, positions, sequence_length = extract_variables(model_name)

    # Load model
    model = load_tf_model(f"models/{model_name}.h5", euclidean_distance_loss=True)

    # Prepare the input data for LSTM model
    if "LSTM" in model_name:
        # Prepare X_test_input and y_test
        X_test_input, y_test = prepare_LSTM_input_data(frames_dfs, numerical_cols, categorical_cols, sequence_length, positions, downsampling_factor=1)

        # Create the DataFrame that will recieve the predictions
        prepared_frames_df = prepare_LSTM_df(frames_dfs, numerical_cols, categorical_cols, sequence_length, positions, downsampling_factor=1)

    # Prepare the input data for non-LSTM model
    else:
        # Prepare X_test_input and y_test
        X_test_input, y_test = prepare_EL_input_data(frames_dfs, numerical_cols, categorical_cols, positions, downsampling_factor=1)

        # Create the DataFrame that will recieve the predictions
        prepared_frames_dfs = [prepare_df(frames_df, numerical_cols, categorical_cols, positions=positions, downsampling_factor=1) for frames_df in frames_dfs]
        prepared_frames_df = pd.concat(prepared_frames_dfs, ignore_index=True)

    # Make predictions using the loaded tf model
    predictions = model.predict(X_test_input)

    # Extract the predicted values
    x_future_pred = predictions[:, 0]
    y_future_pred = predictions[:, 1]

    # Add the predicted values to 'x_future_pred' and 'y_future_pred' columns
    if "LSTM" in model_name:
        # Check that the length of x_future_pred and y_future_pred matches the number of True values in 'can_be_sequentialized'
        assert len(x_future_pred) == prepared_frames_df['can_be_sequentialized'].sum()
        assert len(y_future_pred) == prepared_frames_df['can_be_sequentialized'].sum()

        # Add the predicted values to 'x_future_pred' and 'y_future_pred' columns where 'can_be_sequentialized' is True
        prepared_frames_df.loc[prepared_frames_df['can_be_sequentialized'], 'x_future_pred'] = x_future_pred
        prepared_frames_df.loc[prepared_frames_df['can_be_sequentialized'], 'y_future_pred'] = y_future_pred

        if normalize:
            # Unnormalize the numerical columns
            for col in numerical_cols:
                if col in denominators:
                    prepared_frames_df[col] = prepared_frames_df[col] * denominators[col]
    else:
        prepared_frames_df['x_future_pred'] = x_future_pred
        prepared_frames_df['y_future_pred'] = y_future_pred

    # Clip values to stay on the pitch
    prepared_frames_df['x_future_pred'] = prepared_frames_df['x_future_pred'].clip(lower=0, upper=pitch_length)
    prepared_frames_df['y_future_pred'] = prepared_frames_df['y_future_pred'].clip(lower=0, upper=pitch_width)

    # Smooth the predicted coordinates
    # smooth_predictions_xy(frames_df, alpha=0.98)

    return prepared_frames_df

In [4]:
# Create the vectors 'x_future_pred' and 'y_future_pred' on frames_df
def make_predictions(frames_df, model_name):
    # Prepare the DataFrame by adding all features
    frames_df = add_all_features(frames_df)

    # Run the model and add the vectors 'x_future_pred' and 'y_future_pred' to frames_df
    frames_df = run_model([frames_df], model_name)

    # Calculate the error
    error = total_error_loss(frames_df)
    print(f"Error: {error}")

    # Define the file path
    file_path = f"models/{model_name}.txt"
    # if 'Testing results' does not exists in txt file
    with open(file_path, 'r') as file:
        if 'Testing results' not in file.read():
            # Write the following with f.write
            with open(file_path, 'a') as file:  # 'a' mode to append data
                file.write(f"\nTesting results:\ntest_loss: {error}\n")
            print("Testing results added to the file.")

    return frames_df

## Example usage

In [5]:
# Example match
DATA_FOLDER_UNPROCESSED = f"{DATA_LOCAL_FOLDER}/data/2023/Allsvenskan/unprocessed"
match_id = '8ef97096-db3b-4597-8dfb-3bca4e69586b'
file_path_match = f"{DATA_FOLDER_UNPROCESSED}/{match_id}.parquet"

# Convert parquet file to a DataFrame
frames_df = pd.read_parquet(file_path_match).iloc[1000:10000]

# Make the prediction
model_name = "LSTM_best_v1"
frames_df = make_predictions(frames_df, model_name)

2024-04-24 12:21:14.186898: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31132 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:89:00.0, compute capability: 7.0


Started prepare_data
Ended prepare_data, Duration: 0.02 seconds
Started sequentialization
Ended sequentialization, Duration: 0.11 seconds
Started prepare_data
Ended prepare_data, Duration: 0.01 seconds
Started sequentialization
Ended sequentialization, Duration: 0.11 seconds
 44/195 [=====>........................] - ETA: 0s  

2024-04-24 12:21:16.287763: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401




KeyError: 'x_future'

In [None]:
visualize_prediction_animation(frames_df, 300, 380, model_name)