# Notebook: Live application - simulation

This notebook contains the first version of the live program of my master thesis. It doesn't really work on live games, but works with a simulation of the FTP server (where the files are normally stored during the live game). So while simulating the FTP server, it saves all the necesarry graphs and data of the players of both teams. 

In [None]:
# Import libraries 
import floodlight.io.statsperform
import matplotlib.pyplot as plt
import numpy as np
import os
import json
import time
import pandas as pd
import json
import keyboard
import math
import requests
import xml.etree.ElementTree as ET
from hashlib import sha512

from floodlight.core.xy import XY
from floodlight.models.kinematics import DistanceModel
from floodlight.models.kinematics import VelocityModel
from datetime import datetime


# To make sure that the plots are in an external window
# %matplotlib qt
%matplotlib inline

fixture_uuid = '9eldyiefgebuageoqyepzu0pg'

In [None]:
outlet_key = ''
outlet_secret = ''

# Retrieve acces token to StatsPerform API
def retrieve_access_token():
    url = f'https://oauth.performgroup.com/oauth/token/{outlet_key}'
    params = {
        '_fmt': 'json',
        '_rt': 'b',
    }
    data = {
        'grant_type': 'client_credentials',
        'scope': 'b2b-feeds-auth',
    }

    timestamp = int(time.time() * 1000)
    base_token = f'{outlet_key}{timestamp}{outlet_secret}'
    token = sha512(base_token.encode()).hexdigest()

    headers = {
        'Timestamp': f'{timestamp}',
        'Authorization': f'Basic {token}',
        'Content-Type': 'application/x-www-form-urlencoded',
    }

    response = requests.post(url, headers=headers, data=data, params=params)
    response_json = response.json()

    return response_json['access_token']

contestant_home = ''
contestant_away = ''

# Lists with players of both team
def retrieve_MA2(fixture_uuid): 
    global contestant_home 
    global contestant_away

    url = f'https://api.performfeeds.com/soccerdata/matchstats/{outlet_key}'
    params = {
        '_rt': 'b',
        '_fmt': 'xml',
        'fx': fixture_uuid
    }
    
    headers = {
        'Authorization': f'Bearer {access_token}'
    }

    response = requests.get(url, headers=headers, params=params)
    MA2_data = response.text

    root = ET.fromstring(MA2_data)

    contestant_home = root.find('.//matchInfo//contestants')[0].get('id')
    contestant_away = root.find('.//matchInfo//contestants')[1].get('id')

    home_dict = {}
    away_dict = {}

    liveData = root.find('.//liveData')

    lineup_home = liveData.find(f'.//lineUp[@contestantId="{contestant_home}"]')
    lineup_away = liveData.find(f'.//lineUp[@contestantId="{contestant_away}"]')
    
    for player in lineup_home: 
        short_last_name = player.get('shortLastName')
        shirt_number = player.get('shirtNumber')

        if short_last_name and shirt_number: 
            home_dict[shirt_number] = short_last_name

    for player in lineup_away: 
        short_last_name = player.get('shortLastName')
        shirt_number = player.get('shirtNumber')

        if short_last_name and shirt_number: 
            away_dict[shirt_number] = short_last_name

    return home_dict, away_dict


access_token = retrieve_access_token()
home_dict, away_dict = retrieve_MA2(fixture_uuid)

In [None]:
# Path to the folder where the new files are added
path = ''

# Get lists with players of both teams
shirt_numbers_home = []
shirt_numbers_away = []

for i in home_dict: 
    shirt_numbers_home.append(i)

for j in away_dict:
    shirt_numbers_away.append(j)

last_values_home = {number: (0, 0) for number in shirt_numbers_home}
last_values_away = {number: (0, 0) for number in shirt_numbers_away}


# Create empty numpy array for the frames 
frame_size = len(shirt_numbers_home) + len(shirt_numbers_away)
current_frames = np.empty((0, frame_size*2))

In [None]:
# Load the average distance dictionary from the json file - Get average line
file_path_dist = 'average_distance_05.json'

with open(file_path_dist, 'r') as file_dist:
    avg_dist_dict = json.load(file_dist)

In [None]:
# Load the average velocity dictionary from the json file - Get average line
file_path_vel = 'average_velocity_frames_05.json'

with open(file_path_vel, 'r') as file_vel:
    vel_dict = json.load(file_vel)

In [None]:
# Load the average velocity dictionary from the json file - Get average to calculate the frames 
file_path_vel = 'average_velocity_05.json'

with open(file_path_vel, 'r') as file_vel:
    avg_vel_dict = json.load(file_vel)

In [None]:
processed_files = []

def new_file_in_folder(path):
    global processed_files

    # Get a list of files in the folder
    files = os.listdir(path)
    
    # Check if there are any files
    if not files:
        print("No files found in the folder.")
        return False
    
    # Check for new files
    for file_name in files:
        file_path = os.path.join(path, file_name)
        if file_name not in processed_files:
            print("A new file has been found:", file_name)
            processed_files.append(file_name)
            read_data(file_path)   # Read data from the new file
            break
    
    print("No new files found in the folder.")
    return False


In [None]:
# Function to calculate the Cartesian distance between two points
def cartesian_distance(x1, y1, x2, y2):
    return math.sqrt((float(x2) - float(x1))**2 + (float(y2) - float(y1))**2)

# Dictionaries to store ID and latest positions for home and away teams
id_dict_home = {}
latest_dict_home = {}

id_dict_away = {}
latest_dict_away = {}

# Function that reads the data and parses it to the format needed for floodlight 
def read_data(file): 
    global current_frames_home
    global current_frames_away
    global id_dict_home
    global id_dict_away
    global last_values_home
    global last_values_away
    global latest_dict_home
    global latest_dict_away
    
    # Set the filename for the tracking data
    filename_tracking_data = file

    # Read the data in the file 
    with open(filename_tracking_data, 'r') as file:
        data = file.readlines()

    # List to store parsed coordinates
    xy = []
    
    # Iterate over each line in the data file
    for line in data:
        line_counter = 0
        parsed_arrays = []

        # Strip the line to remove unneeded parts and split it
        parts = line.strip().split(':')[1]
        player_parts = parts.strip().split(';')

        # Counter to check if all necessary numbers are present
        shirt_number_counter = 0
        
        # Dictionaries to store the latest positions of players in the current line
        latest_dict_new_home = latest_dict_home
        latest_dict_new_away = latest_dict_away

        # Iterate over each player in the current line
        for player in player_parts: 
            if player: 
                # Split player info into chunks:
                # 0: Home or away team (or keeper - not relevant)
                # 1: ID (not useful as it changes constantly)
                # 2: Kit number (sometimes -1 if not defined yet)
                # 3 & 4: x-coord and y-coord data 
                player_chunck = player.strip().split(',')

                # If the player doesn't have a jersey number, the line is not useful
                shirt_number = player_chunck[2]
                
                if shirt_number == '-1': 
                    cart = 100000
                    # Find the closest player in the latest_dict for home team
                    if player_chunck[0] == '0' or player_chunck[0] == '3': 
                        for i in latest_dict_home: 
                            cart_dist = cartesian_distance(player_chunck[3], player_chunck[4], latest_dict_home[i][0], latest_dict_home[i][1])
                            if cart_dist < cart:
                                cart = cart_dist  
                                if cart_dist != 100000:
                                    shirt_number = i
                    # Find the closest player in the latest_dict for away team
                    elif player_chunck[0] == '1' or player_chunck[0] == '4':
                        for i in latest_dict_away: 
                            cart_dist = cartesian_distance(player_chunck[3], player_chunck[4], latest_dict_away[i][0], latest_dict_away[i][1])
                            if cart_dist < cart:
                                cart = cart_dist  
                                if cart_dist != 100000:
                                    shirt_number = i
                else:
                    # Increment counter for valid players
                    if player_chunck[0] == '0' or player_chunck[0] == '3' or player_chunck[0] == '1' or player_chunck[0] == '4': 
                        shirt_number_counter += 1
                        
                # Update dictionaries for valid home team players
                if shirt_number != '-1' and (player_chunck[0] == '0' or player_chunck[0] == '3'): 
                    latest_dict_new_home[shirt_number] = (player_chunck[3], player_chunck[4])
                    last_values_home[shirt_number] = (player_chunck[3], player_chunck[4])
                # Update dictionaries for valid away team players
                elif shirt_number != '-1' and (player_chunck[0] == '1' or player_chunck[0] == '4'): 
                    latest_dict_new_away[shirt_number] = (player_chunck[3], player_chunck[4])
                    last_values_away[shirt_number] = (player_chunck[3], player_chunck[4])

        # Update the latest_dict with new positions
        latest_dict_home = latest_dict_new_home
        latest_dict_away = latest_dict_new_away

        # Append the coordinates in last_values to a numpy array 
        for number in shirt_numbers_home: 
            parsed_arrays.append(last_values_home[number][0])
            parsed_arrays.append(last_values_home[number][1])
        
        for number in shirt_numbers_away: 
            parsed_arrays.append(last_values_away[number][0])
            parsed_arrays.append(last_values_away[number][1])            

        # If all the shirt_numbers are present, add to array  
        if shirt_number_counter >= 12: 
            np_parsed = np.array(parsed_arrays)
            xy.append(np_parsed)   

    # Check if the array has enough data
    if len(xy) >= 1000: 
        add_frames(np.asarray(xy, dtype="object"))
    else:
        print(f"File too short: {len(xy)}")
    print(f'Size: {current_frames.shape}')

    # Make graph if the file isn't empty 
    if current_frames.shape[0] > 1: 
        make_graphs(current_frames)

In [None]:
# Add frames of new file to global frames array 
def add_frames(xy):
    global current_frames

    print(xy.shape)
    print(current_frames.shape)
        
    if len(xy.shape) == 2:
        current_frames = np.concatenate((current_frames, xy))

In [None]:
# Function to smooth data using previous values within a kernel size
def smooth_previous_only(data, kernel_size=20):
    smoothed_data = []
    for i in range(len(data)):
        if i < kernel_size:
            window = data[:i+1]
        else:
            window = data[i-kernel_size+1:i+1]
        smoothed_value = int(np.mean(window))  
        smoothed_data.append(smoothed_value)
    return np.array(smoothed_data)

# Function to create and save graphs based on the processed data
def make_graphs(xy):
    global times_loop        

    folder_path = 'now_live'
    os.makedirs(folder_path, exist_ok=True)
    
    times_loop += 1
    
    # Create numpy array of the data
    array_data = np.array(xy)

    # Convert each array to float type
    float_arrays = [arr.astype(float) for arr in array_data]

    xy_values_array = np.asarray(float_arrays, dtype=object)

    # Create an XY object for floodlight
    xy_values = XY(xy=xy_values_array, framerate=25, direction=None)    

    # Create distance and velocity models
    dm = DistanceModel()
    dm.fit(xy_values)
    cumulative_distance_covered = dm.cumulative_distance_covered()

    vm = VelocityModel()
    vm.fit(xy_values)
    vm.velocity()

    # Prevent graphs from being displayed
    plt.ioff()

    # Close all the plots that were open before this step
    plt.close('all')

    # Counter to decide where the players should be on the plot 
    counter = 0

    all_players_data = {}
    
    # Process data for home team players
    for i in shirt_numbers_home: 
        try: 
            # Calculate which data belongs to the specific player 
            mapped_index = shirt_numbers_home.index(i)
            cumulative_distance_data = cumulative_distance_covered.property[:, mapped_index]
    
            # Create buckets that contain the total cumulative data per minute instead of the cumulative data per 0.04 seconds
            data_points_per_minute = 25 * 60
            downsampled_data = cumulative_distance_data[::data_points_per_minute]
            cumulative_distance_per_minute = np.gradient(downsampled_data)
    
            # Remove 0 values, which indicate player substitutions
            cumulative_distance_per_minute = cumulative_distance_per_minute[cumulative_distance_per_minute != 0]
    
            # Use the custom smoothing function
            smoothed_array = smooth_previous_only(cumulative_distance_per_minute)
    
            # Calculate the average value of the smoothed array
            average_distance = np.mean(smoothed_array)

            player_name = home_dict[i]

            # Add the distance data of the players to the subplot 
            fig_dist, axs_dist = plt.subplots(figsize=(10, 8))
            axs_dist.set_title(f'Richtingscoëfficiënt van cumulatieve afstand van {player_name}', fontsize=20)
            axs_dist.set_xlabel('Tijd (minuten)', fontsize=20)
            axs_dist.set_ylabel('Richtingscoëfficiënt per minuut', fontsize=20)
            axs_dist.grid(True)
            axs_dist.plot(smoothed_array)
            axs_dist.axhline(y=average_distance, color='green', linestyle='--', label='Average')

            # Save the distance subplot
            player_folder_path = os.path.join(folder_path, f'{player_name}')
            os.makedirs(player_folder_path, exist_ok=True)
            fig_dist_path = os.path.join(player_folder_path, f'{i}_distance_{times_loop}.png')
            fig_dist.savefig(fig_dist_path)
            plt.close(fig_dist)

            # Get velocity model data for the specific player 
            total_velocity = vm.velocity()[:, mapped_index]

            # Calculate the velocity threshold 
            avg = 2
            velocity = avg * 2

            # Check how many frames are above the velocity threshold 
            above_threshold_mask = total_velocity > velocity
            elements_above_threshold = np.sum(above_threshold_mask)
    
            # Create buckets per minute and remove the 0 values
            minutes = 1
            frame_size = 25 * minutes * 60
            num_frames = len(above_threshold_mask) // frame_size
            counts_array = np.zeros(num_frames, dtype=int)
            above_threshold_mask_reshaped = above_threshold_mask[:num_frames * frame_size].reshape(num_frames, frame_size)
            counts_array = np.sum(above_threshold_mask_reshaped, axis=1)
            counts_array = np.trim_zeros(counts_array, 'b')

            # Use the custom smoothing function
            smoothed_count_array = smooth_previous_only(counts_array)
            
            average_vel = np.mean(smoothed_count_array)

            # Add the velocity data of the players to the subplot 
            fig_vel, axs_vel = plt.subplots(figsize=(10, 8))
            axs_vel.set_title(f'Aantal frames boven 2 keer de gemiddelde snelheid van {player_name}', fontsize=20)
            axs_vel.set_xlabel('Tijd (minuten)', fontsize=20)
            axs_vel.set_ylabel('Aantal frames per minuut', fontsize=20)
            axs_vel.grid(True)
            axs_vel.plot(smoothed_count_array, color='red')
            axs_vel.axhline(y=average_vel, color='green', linestyle='--', label='Average')

            # Save the velocity subplot
            fig_vel_path = os.path.join(player_folder_path, f'{i}_velocity_{times_loop}.png')
            fig_vel.savefig(fig_vel_path)
            plt.close(fig_vel)

            # Store player data in a dictionary
            player_data = {
                "smoothed_array": smoothed_array.tolist(),
                "average_distance": average_distance,
                "smoothed_count_array": smoothed_count_array.tolist(),
                "average_vel": average_vel
            }
            
            all_players_data[i] = player_data

            # Update counter 
            counter += 1
    
        except Exception as e:
            if "can't extend empty axis 0 using modes other than 'constant' or 'empty'" in str(e):
                pass
            else:
                print(f"An error occurred by {player_name}: {e}")

    # Save home team data to JSON file
    if contestant_home == '1oyb7oym5nwzny8vxf03szd2h': 
        json_folder_path = os.path.join(folder_path, f"physical_data")
        os.makedirs(json_folder_path, exist_ok=True)
        json_file_name = f'physical_data_{times_loop}.json'
        json_file_path = os.path.join(json_folder_path, json_file_name)
        with open(json_file_path, 'w') as json_file:
            json.dump(all_players_data, json_file)
    
    all_players_data = {}

    # Process data for away team players
    for i in shirt_numbers_away: 
        try: 
            # Calculate which data belongs to the specific player 
            mapped_index = shirt_numbers_away.index(i) + len(shirt_numbers_home)
            cumulative_distance_data = cumulative_distance_covered.property[:, mapped_index]

            player_name = away_dict[i]
    
            # Create buckets that contain the total cumulative data per minute instead of the cumulative data per 0.04 seconds
            data_points_per_minute = 25 * 60
            downsampled_data = cumulative_distance_data[::data_points_per_minute]
            cumulative_distance_per_minute = np.gradient(downsampled_data)
    
            # Remove 0 values, which indicate player substitutions
            cumulative_distance_per_minute = cumulative_distance_per_minute[cumulative_distance_per_minute != 0]
    
            # Use the custom smoothing function
            smoothed_array = smooth_previous_only(cumulative_distance_per_minute)

            print(f'{i} - {smoothed_array.size}')
            
            # Calculate the average value of the smoothed array
            average_distance = np.mean(smoothed_array)

            # Add the distance data of the players to the subplot 
            fig_dist, axs_dist = plt.subplots(figsize=(10, 8))
            axs_dist.set_title(f'Richtingscoëfficiënt van cumulatieve afstand van {player_name}', fontsize=20)
            axs_dist.set_xlabel('Tijd (minuten)', fontsize=20)
            axs_dist.set_ylabel('Richtingscoëfficiënt per minuut', fontsize=20)
            axs_dist.grid(True)
            axs_dist.plot(smoothed_array)
            axs_dist.axhline(y=average_distance, color='green', linestyle='--', label='Average')

            # Save the distance subplot
            player_folder_path = os.path.join(folder_path, f'{player_name}')
            os.makedirs(player_folder_path, exist_ok=True)
            fig_dist_path = os.path.join(player_folder_path, f'{i}_distance_{times_loop}.png')
            fig_dist.savefig(fig_dist_path)
            plt.close(fig_dist)

            # Get velocity model data for the specific player 
            total_velocity = vm.velocity()[:, mapped_index]

            # Calculate the velocity threshold 
            avg = 2
            velocity = avg * 2

            # Check how many frames are above the velocity threshold 
            above_threshold_mask = total_velocity > velocity
            elements_above_threshold = np.sum(above_threshold_mask)
    
            # Create buckets per minute and remove the 0 values
            minutes = 1
            frame_size = 25 * minutes * 60
            num_frames = len(above_threshold_mask) // frame_size
            counts_array = np.zeros(num_frames, dtype=int)
            above_threshold_mask_reshaped = above_threshold_mask[:num_frames * frame_size].reshape(num_frames, frame_size)
            counts_array = np.sum(above_threshold_mask_reshaped, axis=1)
            counts_array = np.trim_zeros(counts_array, 'b')

            # Use the custom smoothing function
            smoothed_count_array = smooth_previous_only(counts_array)
            
            average_vel = np.mean(smoothed_count_array)

            # Add the velocity data of the players to the subplot 
            fig_vel, axs_vel = plt.subplots(figsize=(10, 8))
            axs_vel.set_title(f'Aantal frames boven 2 keer de gemiddelde snelheid van {player_name}', fontsize=20)
            axs_vel.set_xlabel('Tijd (minuten)', fontsize=20)
            axs_vel.set_ylabel('Aantal frames per minuut', fontsize=20)
            axs_vel.grid(True)
            axs_vel.plot(smoothed_count_array, color='red')
            axs_vel.axhline(y=average_vel, color='green', linestyle='--', label='Average')

            # Save the velocity subplot
            fig_vel_path = os.path.join(player_folder_path, f'{i}_velocity_{times_loop}.png')
            fig_vel.savefig(fig_vel_path)
            plt.close(fig_vel)

            # Store player data in a dictionary
            player_data = {
                "smoothed_array": smoothed_array.tolist(),
                "average_distance": average_distance,
                "smoothed_count_array": smoothed_count_array.tolist(),
                "average_vel": average_vel
            }
            
            all_players_data[i] = player_data

            # Update counter 
            counter += 1
    
        except Exception as e:
            if "can't extend empty axis 0 using modes other than 'constant' or 'empty'" in str(e):
                pass
            else:
                print(f"An error occurred by {player_name}: {e}")

    # Save away team data to JSON file
    if contestant_home != '1oyb7oym5nwzny8vxf03szd2h': 
        json_folder_path = os.path.join(folder_path, f"physical_data")
        os.makedirs(json_folder_path, exist_ok=True)
        json_file_name = f'physical_data_{times_loop}.json'
        json_file_path = os.path.join(json_folder_path, json_file_name)
        with open(json_file_path, 'w') as json_file:
            json.dump(all_players_data, json_file)

    # Close any remaining plots
    plt.close('all')

In [None]:
times_loop = 0

# While loop that constantly runs, checks for new files and executes the necesarry functions 
while(True): 
    new_file_in_folder(path)
   
    time.sleep(30)
    
    if keyboard.is_pressed('esc'):
        break