# Notebook: Offline program
This notebook contains the ofline program of the master thesis of Theo Vandeportaele. It can itterate over a few games in a certain folder and plot the distance and velocity graphs of every player that played in that certain game.

In [None]:
import floodlight.io.statsperform
import matplotlib.pyplot as plt
import numpy as np
import os
import re
import json

from tqdm import tqdm
from floodlight.core.xy import XY
from floodlight.models.kinematics import DistanceModel
from floodlight.models.kinematics import VelocityModel

In [None]:
# Folder with offline trackingdata of games in
path = ''

In [None]:
# Numerical way to sort over files
def numerical_sort(value):
    numbers = re.compile(r'(\d+)')
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [None]:
# Load the average distance dictionary from the json file
file_path_dist = ''

with open(file_path_dist, 'r') as file_dist:
    avg_dist_dict = json.load(file_dist)

In [None]:
# Load the average velocity dictionary from the json file
file_path_vel = ''

with open(file_path_vel, 'r') as file_vel:
    avg_vel_dict = json.load(file_vel)

In [None]:
file_path_online = ''

with open(file_path_online, 'r') as file_online:
    online_json = json.load(file_online)

online_dist = online_json['44']['smoothed_array']

In [None]:
# Iterate over all folders in the path folder
# Each folder represents the data of a different game played this season
all_games_data = []

for file in tqdm(sorted(os.listdir(path), key=numerical_sort)):
    # Create tracking file full path
    filename = os.fsdecode(file)
    filename_tracking_data = path + filename
    print(filename_tracking_data)
        
    # Get teamsheets of tracking file
    teamsheets = floodlight.io.statsperform.read_teamsheets_from_position_data_txt(filename_tracking_data)
    
    # Determine if Club Brugge is the home or away team
    truncated_filename = filename.split("-", 1)[-1]
    print(truncated_filename)
    if truncated_filename.startswith("Club Brugge"):
        team = 'Home'
    else: 
        team = 'Away'
    print(team)

    # Get tracking data
    data = floodlight.io.statsperform.read_position_data_txt(filename_tracking_data)

    #
    # CALCULATION OF AVERAGE DISTANCE
    #
    
    # Create distance model and get cumulative distance of the first half
    xy_values = data[0][1][team]

    dm = DistanceModel()
    dm.fit(xy_values)
    cumulative_distance_covered = dm.cumulative_distance_covered()

    # Create distance model and get cumulative distance of the second half
    xy_values_second = data[0][2][team]

    dm_2 = DistanceModel()
    dm_2.fit(xy_values_second)
    cumulative_distance_covered_2 = dm_2.cumulative_distance_covered()

    #
    # CALCULATION OF AVERAGE VELOCITY
    #

    # Create velocity model and get velocity of the first half
    vm = VelocityModel()
    vm.fit(xy_values)
    vm.velocity()

    # Create velocity model and get velocity of the second half
    vm_2 = VelocityModel()
    vm_2.fit(xy_values_second)
    vm_2.velocity()

    print(xy_values)
    print(xy_values_second)

    # Create id_mapping table to map the tracking file ID to the shirt number of the player
    id_mapping = data[1][team]

    all_players_data = {}
    
    for player_id in teamsheets[team]['jID']:
        # Use the id_mapping table to get the shirt number of the current player
        mapped_index = id_mapping[id_mapping['jID'] == player_id]['xID'].values.tolist()[0] - 1
        
        # Get the cumulative distance data of only that specific player
        cumulative_distance_data = cumulative_distance_covered.property[:, mapped_index]
        cumulative_distance_data_2 = cumulative_distance_covered_2.property[:, mapped_index]

        # Add the last element of the cumulative model of the first half to all the values of the second half
        # Otherwise, the values of the second half start again from 0
        last_element_data_1 = cumulative_distance_data[-1]
        result_array = cumulative_distance_data_2 + last_element_data_1

        # Concatenate the cumulative data from the first and the second half
        total_cum_data = np.concatenate((cumulative_distance_data, result_array), axis=0)

        # Create buckets that contain the total cumulative data per minute instead of the cumulative data per 0.04 seconds (original is per 0.04 seconds)
        data_points_per_minute = 25 * 60
        downsampled_data = total_cum_data[::data_points_per_minute]
        cumulative_distance_per_minute = np.gradient(downsampled_data)

        # If cumulative data per minute = 0; then delete it. This only happens after a player is substituted out or before a player is substituted in
        cumulative_distance_per_minute = cumulative_distance_per_minute[cumulative_distance_per_minute != 0]

        # Use a kernel to smoothen the graph
        kernel_size = 20
        kernel = np.ones(kernel_size) / kernel_size
        padded_counts_array = np.pad(cumulative_distance_per_minute, (kernel_size // 2, kernel_size // 2), mode='symmetric')
        smoothed_array = np.convolve(padded_counts_array, kernel, mode='valid')

        smoothed_array = cumulative_distance_per_minute
        kernel_size = 20
        kernel = np.ones(kernel_size) / kernel_size
        cumulative_distance_per_minute[:75]
        padded_counts_array = np.pad(cumulative_distance_per_minute, (kernel_size - 1, 0), mode='edge')
        smoothed_array = np.convolve(padded_counts_array, kernel, mode='same')
        smoothed_array = smoothed_array[10:]  # Discard the first 5 values

        # Calculate the average value of the smoothed array. The second options in comments is used for players that aren't Club Brugge players.
        # We don't have the average values of these players, so we just calculate in like this.
        average_distance = avg_dist_dict[str(player_id)]

        plt.rc('font', size=12)  # Set the default font size to 12

        # Plot graph of player and also plot the average line
        plt.figure(figsize=(10, 6))
        plt.plot(smoothed_array, label=f'Player {player_id}')
        plt.axhline(y=average_distance, color='green', linestyle='--', label='Average')
        plt.title(f'Richtingscoëfficiënt van cumulatieve afstand van speler {player_id} (offline programma)')
        plt.xlabel('Tijd (minuten)')
        plt.ylabel('Richtingscoëfficiënt per minuut')
        plt.grid(True)

        # Save the plot
        plt.savefig(f'Player_{player_id}.png')
        plt.close()

        #
        # CALCULATION OF AVERAGE VELOCITY
        #
        
        # Get the velocity of the specific player
        velocity_1 = vm.velocity()[:, mapped_index]
        velocity_2 = vm_2.velocity()[:, mapped_index]

        # Concatenate velocity of the first and second half
        total_velocity = np.concatenate((velocity_1, velocity_2), axis=0)
        
        # Calculate the average velocity of the player and multiply by 2
        avg = np.nanmean(total_velocity)
        velocity = avg * 2

        # Calculate the amount of frames above a certain value (2*avg_velocity) per minute
        # Create a mask to see when the value is above the velocity value and count the number of times it's higher
        above_threshold_mask = total_velocity > velocity
        elements_above_threshold = np.sum(above_threshold_mask)

        # Create buckets per minute and remove the 0 values, since values are only 0 if
        minutes = 1
        frame_size = 25 * minutes * 60
        num_frames = len(above_threshold_mask) // frame_size
        counts_array = np.zeros(num_frames, dtype=int)
        above_threshold_mask_reshaped = above_threshold_mask[:num_frames * frame_size].reshape(num_frames, frame_size)
        counts_array = np.sum(above_threshold_mask_reshaped, axis=1)
        counts_array = np.trim_zeros(counts_array, 'b')

        counts_array = counts_array[counts_array != 0]

        average_velocity = avg_vel_dict[str(player_id)]

        # Define a kernel to smoothen the graph
        kernel_size = 15
        kernel = np.ones(kernel_size) / kernel_size
        
        # Pad the counts_array to handle edges
        padded_counts_array = np.pad(counts_array, (kernel_size // 2, kernel_size // 2), mode='symmetric')
        
        # Use convolution to compute the average on the padded array
        smoothed_count_array = np.convolve(padded_counts_array, kernel, mode='valid')

        # Plot the graph and the average value
        plt.figure(figsize=(10, 6))
        plt.plot(smoothed_count_array, label='Smoothed Data', color='red')
        plt.axhline(y=average_velocity, color='green', linestyle='--', label='Average')
        plt.xlabel('Minute')
        plt.ylabel('Number of frames above 2 times average velocity')
        plt.title(f'Number of frames above certain velocity by player {player_id}')
        plt.legend()  
        plt.grid(True)
        plt.savefig(f'Player_{player_id}_velocity.png')
        plt.close()

        # Store the player's data
        player_data = {
            'distance': smoothed_array.tolist(), 
            'velocity': smoothed_count_array.tolist(),
            'cum_distance': total_cum_data.tolist(), 
        }

        all_players_data[player_id] = player_data
        
    all_games_data.append(all_players_data)
