# Model Evaluation
This notebook contains the test results for each model, how the predictions differ based on parameters such as 'position', and some animated visualisations

### Import packages

In [1]:
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.models import load_model as keras_load_model
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Model
from sklearn.pipeline import Pipeline
from keras.models import Sequential

import tensorflow as tf
import pandas as pd
import numpy as np
import random
import glob
import os

from utils import load_processed_frames, split_match_ids, total_error_loss, run_model, evaluate_model, test_model, print_column_variance, add_pred_error, smooth_predictions_xy, prepare_LSTM_df
from visualize_game import visualize_prediction_animation, visualize_game_animation
from settings import *

2024-06-02 11:32:24.052078: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


## Define NAIVE models

In [2]:
# NAIVE: Always predict that all players will stand still
# The calculations are based on x, y
def predict_two_seconds_naive_static(frames_df, seconds=None):
    frames_df['x_future_pred'] = frames_df['x']
    frames_df['y_future_pred'] = frames_df['y']

# NAIVE: Always predict that all players will continue with the same velocity
# The calculations are based on x, y, v_x, and v_y
def predict_two_seconds_naive_velocity(frames_df, seconds=seconds_into_the_future):
    frames_df['x_future_pred'] = frames_df['x'] + frames_df['v_x'] * seconds
    frames_df['y_future_pred'] = frames_df['y'] + frames_df['v_y'] * seconds

    # Clip values to stay on the pitch
    frames_df['x_future_pred'] = frames_df['x_future_pred'].clip(lower=0, upper=pitch_length)
    frames_df['y_future_pred'] = frames_df['y_future_pred'].clip(lower=0, upper=pitch_width)

    # Smooth the predicted coordinates
    # smooth_predictions_xy(frames_df, alpha=0.95)

# NAIVE: Always predict that all players will continue with the same velocity and acceleration
# The calculations are based on x, y, v_x, v_y, a_x, and a_y
def predict_two_seconds_naive_acceleration(frames_df, seconds=seconds_into_the_future):
    # Calculate future positions using kinematic equationsnaive_
    frames_df['x_future_pred'] = frames_df['x'] + frames_df['v_x'] * seconds + 0.5 * frames_df['a_x'] * (seconds ** 2)
    frames_df['y_future_pred'] = frames_df['y'] + frames_df['v_y'] * seconds + 0.5 * frames_df['a_y'] * (seconds ** 2)

    # Clip values to stay on the pitch
    frames_df['x_future_pred'] = frames_df['x_future_pred'].clip(lower=0, upper=pitch_length)
    frames_df['y_future_pred'] = frames_df['y_future_pred'].clip(lower=0, upper=pitch_width)

    # Smooth the predicted coordinates
    # smooth_predictions_xy(frames_df, alpha=0.95)

# Define the prediction functions (models) you want to test
prediction_functions = {
    "Naive Static": predict_two_seconds_naive_static,
    "Naive Velocity": predict_two_seconds_naive_velocity,
    "Naive Acceleration": predict_two_seconds_naive_acceleration
}

### Helper functions

In [3]:
# Find a frame with approximatly the same error as the average_pred_error, with an interval
def find_frame_with_average_error(frames_df, average_pred_error, error_margin):
    # For all frames
    frames = frames_df['frame'].unique()
    for frame in frames:
        current_error = frames_df[frames_df['frame'] == frame]['pred_error'].mean()
        # If the current error is within the error_margin,
        if (current_error >= average_pred_error - error_margin) and (current_error <= average_pred_error + error_margin):
            # Return the result
            return frame

    # If no frame was found
    print(f"No frame found within the error margin of {error_margin}")
    return None

# Use a naive model to make predictions on a set of games, and calculate the error
def predict_and_evaluate_naive_model(naive_model_name, test_df, seconds=seconds_into_the_future):
    # Find the prediction function for the naive model
    naive_model = prediction_functions[naive_model_name]

    # Use the custom function to make the predictions
    naive_model(test_df, seconds)

    # Calculate error
    error = total_error_loss(test_df)

    return test_df, error

# Visualize model prediction
def predict_and_visualize(match_id, model_name, start_frame, end_frame, image_frame=None):
    # Load game
    frames_df = load_processed_frames(match_id=match_id)[0]
    frames_df = frames_df[(frames_df['frame'] > start_frame - seconds_into_the_future*FPS) & (frames_df['frame'] <= end_frame)].copy()

    if 'Naive' in model_name:
        # Run naive model
        frames_df, _ = predict_and_evaluate_naive_model(model_name, frames_df)
    else:
        # Run model for NN/ LSTM model
        frames_df = run_model([], model_name, downsampling_factor_testing=1, preloaded_frames_df=frames_df)
        frames_df = add_pred_error(frames_df)
    
    # Flip the 'y' coordinate
    frames_df['y'] = round(pitch_width - frames_df['y'], 2)
    frames_df['y_future'] = round(pitch_width - frames_df['y_future'], 2)
    frames_df['y_future_pred'] = round(pitch_width - frames_df['y_future_pred'], 2)

    # Visualize predictions with an animation
    visualize_prediction_animation(frames_df, start_frame, end_frame, model_name, image_frame)

## Evaulate NAIVE models

In [5]:
# Load test frames
_, test_ids, _ = split_match_ids(560)

unchanged_cols = ['team_name', 'jersey_number', 'player', 'x', 'y', 'frame', 'minute', 'second', 'period',
    'v_x', 'v_y', 'a_x', 'a_y', 'ball_in_motion', 'distance_to_ball', 'angle_to_ball', 'offside',
    'distance_to_onside', 'nationality', 'height', 'weight', 'acc', 'pac', 'sta',
    'position', 'specific_position', 'tiredness', 'tiredness_short',
    'x_future_25', 'y_future_25', 'x_future_50', 'y_future_50', 'x_future_75', 'y_future_75',
    'x_future', 'y_future', 'match_id', 'v_x_avg', 'v_y_avg', 'age']

# Load DataFrame with the given parameters
sequence_length = 10
downsampling_factor_testing = 5
positions = ['Attacking Midfielder', 'Central Midfielder', 'Centre-Back', 'Defensive Midfielder', 'Forward', 'Full-Back', 'Goalkeeper', 'Wide Midfielder', 'Winger']
test_df = prepare_LSTM_df(test_ids, pd.DataFrame(), [], [], unchanged_cols, sequence_length, positions, downsampling_factor_testing)

# Clean up temporary columns
test_df = test_df.drop(columns=['y_values', 'sequential_numerical_data'])

# Only keep rows that can be sequentialized
test_df = test_df[test_df['can_be_sequentialized']]

# Set pred_error to None for rows where 'team_name' is 'ball'
test_df.loc[test_df['team_name'] == 'ball', 'pred_error'] = None

# Set pred_error to None for frames where the ball is not in motion
test_df.loc[test_df['ball_in_motion'] != True, 'pred_error'] = None

### Evaulate the NAIVE models with different parameters

In [7]:
# Define a list with how many seconds into the future we want to predict
list_of_seconds = [1, 2, 3]

# Initialize an empty list to store the results
results = []

# Loop through each combination
for seconds in list_of_seconds:
    # Set 'x_future' and 'y_future'
    test_df['x_future'] = test_df[f'x_future_{seconds*FPS}']
    test_df['y_future'] = test_df[f'y_future_{seconds*FPS}']
    
    # Add the combination of parameters
    result = {"Seconds": seconds}

    # Loop through each prediction function (model)
    for model_name, predict_function in prediction_functions.items():
        # Calculate error for the current prediction function (model)
        _, error = predict_and_evaluate_naive_model(model_name, test_df, seconds)
        result[model_name] = round(error, 2)
    
    # Append the results to the list
    results.append(result)

# Go back to previous values
test_df['x_future'] = test_df[f'x_future_{seconds_into_the_future*FPS}']
test_df['y_future'] = test_df[f'y_future_{seconds_into_the_future*FPS}']

# Create a DataFrame from the list of results
results_df = pd.DataFrame(results)

# Print the resulting DataFrame
results_df

Unnamed: 0,Seconds,Naive Static,Naive Velocity,Naive Acceleration
0,1,2.16,0.7,0.69
1,2,4.15,2.12,2.09
2,3,5.94,3.94,3.89


### Naive models position analysis

In [9]:
# Column to analyze
column_to_analyze = 'position'  # Removed the list brackets assuming it's a single column

# Create an empty DataFrame to store the results
results_df = pd.DataFrame()

# Create an empty list to store the total prediction error
total_error = []

# Loop through each prediction function (model)
for model_name in prediction_functions.keys():
    # Calculate error for the current prediction function (model)
    test_df, error = predict_and_evaluate_naive_model(model_name, test_df, 2)

    # Group by 'column_to_analyze' and calculate the average 'pred_error'
    column_variance_df = test_df.groupby(column_to_analyze)['pred_error'].mean().reset_index()

    # Round to 2 decimal places
    column_variance_df['pred_error'] = round(column_variance_df['pred_error'], 2)

    # Sort by 'column_to_analyze' in ascending order
    column_variance_df = column_variance_df.sort_values(by=column_to_analyze, ascending=True)

    # Set model_name as the index for easy indexing
    column_variance_df.set_index(column_to_analyze, inplace=True)

    # Add column_variance_df to results_df using iloc
    results_df[model_name] = column_variance_df['pred_error']

    # Append error for the current model to total_error list
    total_error.append(round(error, 2))

# Add the 'Total' row to the results_df
results_df.loc['Total'] = total_error

# Print the resulting DataFrame
results_df

Unnamed: 0_level_0,Naive Static,Naive Velocity,Naive Acceleration
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Attacking Midfielder,4.59,2.29,2.26
Central Midfielder,4.72,2.35,2.31
Centre-Back,4.02,2.09,2.06
Defensive Midfielder,4.62,2.33,2.29
Forward,4.32,2.2,2.17
Full-Back,4.29,2.17,2.14
Goalkeeper,1.81,1.14,1.14
Wide Midfielder,4.55,2.31,2.28
Winger,4.49,2.24,2.21
Total,4.15,2.12,2.09


## Evaluate NN models

In [4]:
# model_name = "NN_Model"
test_model("LSTM_model_v3", downsampling_factor_testing=5)
test_model("LSTM_model_v4", downsampling_factor_testing=5)
test_model("LSTM_model_v5", downsampling_factor_testing=5)
test_model("LSTM_model_v6", downsampling_factor_testing=5)
test_model("LSTM_model_v7", downsampling_factor_testing=5)

2024-06-01 12:53:21.719865: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31133 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:8a:00.0, compute capability: 7.0




2024-06-01 13:04:29.659471: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401


Error: 1.634 m
Testing results added to the file.
Error: 1.646 m
Testing results added to the file.
Error: 1.628 m
Testing results added to the file.
Error: 1.753 m
Testing results added to the file.
Error: 1.77 m
Testing results added to the file.


### Print Column Variance

In [None]:
# Load test frames
_, test_ids, _ = split_match_ids(560)

# Print column variance for 'position' with preloaded frames
print_column_variance([], "NN_model_v1", 'position', preloaded_frames_df=test_df)

True
True
True
Average error: 1.763
Average pred error per position:
   position  pred_error
0         0        1.89
1         1        1.92
2         2        1.75
3         3        1.90
4         4        1.82
5         5        1.83
6         6        0.96
7         7        1.92
8         8        1.86


In [None]:
# Print column variance for 'position' with test_ids
print_column_variance(test_ids, "LSTM_model_v1", 'position')

## Visualize All Models

In [4]:
# Create an animation for the sequence (without predictions)
test_id = 'a641b1a0-0603-4a57-81e4-2cbc188ab05c'
start_frame = 94450
end_frame = 94720
image_frame = 94537
offset_2_sec = 2*25

# Visualize the sequence
frames_df = load_processed_frames(match_id=test_id)[0]
frames_df['y'] = round(pitch_width - frames_df['y'], 2)
visualize_game_animation(frames_df, start_frame + offset_2_sec, end_frame + offset_2_sec, image_frame + offset_2_sec)

# Visualize predictions error for any naive model
predict_and_visualize(test_id, 'Naive Static', start_frame, end_frame, image_frame)
predict_and_visualize(test_id, 'Naive Velocity', start_frame, end_frame, image_frame)
predict_and_visualize(test_id, 'Naive Acceleration', start_frame, end_frame, image_frame)
predict_and_visualize(test_id, 'NN_Model', start_frame, end_frame, image_frame)
predict_and_visualize(test_id, 'LSTM_Model', start_frame, end_frame, image_frame)



2024-06-02 11:35:07.256411: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31133 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB, pci bus id: 0000:8a:00.0, compute capability: 7.0




2024-06-02 11:35:52.936290: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401


