# Notebook: Live application 

This notebook contains the version of the live program of the master thesis of Theo Vandeportaele. It saves all the necesarry graphs and data of the players of both teams and works on data of the StatsPerform FTP server. 

In [None]:
# Import libraries 
import floodlight.io.statsperform
import matplotlib.pyplot as plt
import numpy as np
import os
import json
import time
import pandas as pd
import io

from floodlight.core.xy import XY
from floodlight.models.kinematics import DistanceModel
from floodlight.models.kinematics import VelocityModel
from datetime import datetime
from ftplib import FTP


In [None]:
# Make FTP connection
ftp_host = 'soccer-ftp.stats.com'
ftp_username = ''
ftp_password = ''

ftp = FTP(ftp_host)
ftp.login(ftp_username, ftp_password)

In [None]:
# Choose the right folder with the data 
ftp_id = 20240421
ftp_comp = 'belgium'
ftp_home = 'unionsaintgilloise'
ftp_away = 'clubbrugge'

# Path to the folder where the new files are added
path = f"/XYZ/{ftp_id}_{ftp_comp}_{ftp_home}_vs_{ftp_away}"

In [None]:
# 'Home' team or 'Away' team 
if ftp_home == 'clubbrugge': 
    team = 'Home'
else:
    team = 'Away'
    
# Shirt numbers of all players of Club Brugge, numbers of players not playing will be skipped
shirt_numbers = ['22', '29', '4', '6', '14', '44', '55', '58', '64', '10', '15', '20', '27', '39', '62', '77', '7', '8', '9', '11', '32', '68', '99', '17', '26', '70', '28', '66', '76']
last_values = {number: (0, 0) for number in shirt_numbers}

# Create empty numpy array for the frames 
current_frames = np.empty((0, 22))

In [None]:
# Load the average distance dictionary from the json file - Get average line
file_path_dist = 'average_distance.json'

with open(file_path_dist, 'r') as file_dist:
    avg_dist_dict = json.load(file_dist)

In [None]:
# Load the average velocity dictionary from the json file - Get average line
file_path_vel = 'average_velocity_frames.json'

with open(file_path_vel, 'r') as file_vel:
    vel_dict = json.load(file_vel)

In [None]:
# Load the average velocity dictionary from the json file - Get average to calculate the frames 
file_path_vel = 'average_velocity.json'

with open(file_path_vel, 'r') as file_vel:
    avg_vel_dict = json.load(file_vel)

In [None]:
# Initialize last_modification_time globally
last_modification_time = None  

# Function that checks if there are new files in the folder
def new_file_in_folder(path):
    global last_modification_time  

    ftp.cwd(path)
    
    files = ftp.nlst()

    if not files:
        print("No files found in the folder.")
        return False

    # Find the newest file
    newest_file = max(files, key=lambda f: ftp.sendcmd('MDTM ' + f)[4:])
    
    # Get the modification time of the newest file
    new_modification_time = ftp.sendcmd('MDTM ' + newest_file)[4:]

    # Check if the modification time of the newest file is newer than the last checked file 
    if new_modification_time != last_modification_time:
        print("A new file has been found:", newest_file)
        last_modification_time = new_modification_time
        
        # Read data if the file is newer 
        read_data(ftp, newest_file)   
        return True
    else:
        print("No new files found in the folder.")
        return False

In [None]:
# Function to calculate the Cartesian distance between two points
def cartesian_distance(x1, y1, x2, y2):
    return math.sqrt((float(x2) - float(x1))**2 + (float(y2) - float(y1))**2)

# Dictionaries to store ID and latest positions
id_dict = {}
latest_dict = {}

# Global variable for some test value
test_getal = 0

# Function that reads the data and parses it to the format needed for floodlight
def read_data(file):
    global current_frames
    global id_dict
    global last_values
    global latest_dict
    global test_getal

    # Set the filename for the tracking data
    filename_tracking_data = file

    # Read the data in the file
    with open(filename_tracking_data, 'r') as file:
        data = file.readlines()

    # List to store parsed coordinates
    xy = []

    # Iterate over each line in the data file
    for line in data:
        line_counter = 0
        parsed_arrays = []

        # Strip the line to remove unneeded parts and split it
        parts = line.strip().split(':')[1]
        player_parts = parts.strip().split(';')

        # Counter to check if all necessary numbers are present
        shirt_number_counter = 0
        
        # Dictionary to store the latest positions of players in the current line
        latest_dict_new = latest_dict

        # Iterate over each player in the current line
        for player in player_parts:
            if player:
                # Split player info into chunks:
                # 0: Home or away team (or keeper - not relevant)
                # 1: ID (not useful as it changes constantly)
                # 2: Kit number (sometimes -1 if not defined yet)
                # 3 & 4: x-coord and y-coord data
                player_chunck = player.strip().split(',')

                # If the player doesn't have a jersey number, the line is not useful
                shirt_number = player_chunck[2]
                
                if shirt_number == '-1':
                    cart = 100000
                    # Find the closest player in the latest_dict
                    for i in latest_dict:
                        cart_dist = cartesian_distance(player_chunck[3], player_chunck[4], latest_dict[i][0], latest_dict[i][1])
                        if cart_dist < cart:
                            cart = cart_dist
                            if cart_dist != 100000:
                                shirt_number = i
                else:
                    # Increment counter if player is from home or away team
                    if player_chunck[0] == '0' or player_chunck[0] == '3':
                        shirt_number_counter += 1

                # Increment line counter for home or away team players
                if player_chunck[0] == '0' or player_chunck[0] == '3':
                    line_counter += 1

                # Update dictionaries for valid players
                if shirt_number != '-1' and (player_chunck[0] == '0' or player_chunck[0] == '3'):
                    id_dict[player_chunck[1]] = shirt_number
                    latest_dict_new[shirt_number] = (player_chunck[3], player_chunck[4])
                    last_values[shirt_number] = (player_chunck[3], player_chunck[4])

        # Update the latest_dict with new positions
        latest_dict = latest_dict_new
        
        # Append the coordinates in last_values to a numpy array
        for number in shirt_numbers:
            parsed_arrays.append(last_values[number][0])
            parsed_arrays.append(last_values[number][1])

        # If the number of shirt_numbers is within the expected range, add to array
        if shirt_number_counter >= 6 and shirt_number_counter <= 13:
            np_parsed = np.array(parsed_arrays)
            xy.append(np_parsed)

    # Check if the array has enough data
    if len(xy) >= 1000:
        if len(xy) == 1500:
            test_getal += 1
        # Add the current array of the current file to the global array with the data
        add_frames(np.asarray(xy, dtype="object"))
    else:
        print(f"File too short: {len(xy)}")
    print(f'Size: {current_frames.shape}')

    # Make graph if the file isn't empty
    if current_frames.shape[0] > 1:
        make_graphs(current_frames)

In [None]:
# Add frames of new file to global frames array 
def add_frames(xy):
    global current_frames
    
    if len(xy.shape) == 2:
        current_frames = np.concatenate((current_frames, xy))

In [None]:
# Function to smooth data using previous values within a kernel size
def smooth_previous_only(data, kernel_size=20):
    smoothed_data = []
    for i in range(len(data)):
        if i < kernel_size:
            window = data[:i+1]
        else:
            window = data[i-kernel_size+1:i+1]
        smoothed_value = int(np.mean(window))  
        smoothed_data.append(smoothed_value)
    return np.array(smoothed_data)

# Function to create and save graphs based on the processed data
def make_graphs(xy):
    global times_loop

    folder_path = 'now_live'
    os.makedirs(folder_path, exist_ok=True)
    
    times_loop += 1
    
    # Create numpy array of the data
    array_data = np.array(xy)

    # Convert each array to float type
    float_arrays = [arr.astype(float) for arr in array_data]

    xy_values_array = np.asarray(float_arrays, dtype=object)

    # Create an XY object for floodlight
    xy_values = XY(xy=xy_values_array, framerate=25, direction=None)    

    # Create distance and velocity models
    dm = DistanceModel()
    dm.fit(xy_values)
    cumulative_distance_covered = dm.cumulative_distance_covered()

    vm = VelocityModel()
    vm.fit(xy_values)
    vm.velocity()

    # Prevent graphs from being displayed
    plt.ioff()

    # Close all the plots that were open before this step
    plt.close('all')

    # Counter to decide where the players should be on the plot 
    counter = 0
    
    for i in shirt_numbers: 
        try: 
            # Calculate which data belongs to the specific player 
            mapped_index = shirt_numbers.index(i)
            cumulative_distance_data = cumulative_distance_covered.property[:, mapped_index]
    
            # Create buckets that contain the total cumulative data per minute instead of the cumulative data per 0.04 seconds
            data_points_per_minute = 25 * 60
            downsampled_data = cumulative_distance_data[::data_points_per_minute]
            cumulative_distance_per_minute = np.gradient(downsampled_data)
    
            # Remove 0 values, which indicate player substitutions
            cumulative_distance_per_minute = cumulative_distance_per_minute[cumulative_distance_per_minute != 0]
    
            # Use the custom smoothing function
            smoothed_array = smooth_previous_only(cumulative_distance_per_minute)
    
            # Calculate the average value of the smoothed array
            average_distance = avg_dist_dict[str(i)]    

            # Add the distance data of the players to the subplot 
            fig_dist, axs_dist = plt.subplots(figsize=(10, 8))
            axs_dist.set_title(f'Slope of Cumulative Distance of Player {i}', fontsize=16)
            axs_dist.set_xlabel('Time (minutes)', fontsize=16)
            axs_dist.set_ylabel('Slope per Minute', fontsize=16)
            axs_dist.grid(True)
            axs_dist.plot(smoothed_array, label=f'Player {i}')
            axs_dist.axhline(y=average_distance, color='green', linestyle='--', label='Average')

            # Save the distance subplot
            player_folder_path = os.path.join(folder_path, f'player_{i}')
            os.makedirs(player_folder_path, exist_ok=True)
            fig_dist_path = os.path.join(player_folder_path, f'{i}_distance_{times_loop}.png')
            fig_dist.savefig(fig_dist_path)
            plt.close(fig_dist)

            # Get velocity model data for the specific player 
            total_velocity = vm.velocity()[:, mapped_index]

            # Calculate the velocity threshold 
            avg = avg_vel_dict[i]
            velocity = avg * 2

            # Check how many frames are above the velocity threshold 
            above_threshold_mask = total_velocity > velocity
            elements_above_threshold = np.sum(above_threshold_mask)
    
            # Create buckets per minute and remove the 0 values
            minutes = 1
            frame_size = 25 * minutes * 60
            num_frames = len(above_threshold_mask) // frame_size
            counts_array = np.zeros(num_frames, dtype=int)
            above_threshold_mask_reshaped = above_threshold_mask[:num_frames * frame_size].reshape(num_frames, frame_size)
            counts_array = np.sum(above_threshold_mask_reshaped, axis=1)
            counts_array = np.trim_zeros(counts_array, 'b')

            # Use the custom smoothing function
            smoothed_count_array = smooth_previous_only(counts_array)

            # Get value for average velocity line 
            average_vel = vel_dict[str(i)]
            
            # Add the velocity data of the players to the subplot 
            fig_vel, axs_vel = plt.subplots(figsize=(10, 8))
            axs_vel.set_title(f'Amount of frames above certain velocity by player {i}', fontsize=16)
            axs_vel.set_xlabel('Time (minutes)', fontsize=16)
            axs_vel.set_ylabel('Number of frames above 2 times average velocity', fontsize=16)
            axs_vel.grid(True)
            axs_vel.plot(smoothed_count_array, label=f'Player {i}', color='red')
            axs_vel.axhline(y=average_vel, color='green', linestyle='--', label='Average')

            # Save the velocity subplot
            fig_vel_path = os.path.join(player_folder_path, f'{i}_velocity_{times_loop}.png')
            fig_vel.savefig(fig_vel_path)
            plt.close(fig_vel)

            # Update counter 
            counter += 1

        except Exception as e:
            print(f"An error occurred by player {i}: {e}")

    # Close any remaining plots
    plt.close('all')

In [None]:
# While loop that constantly runs, checks for new files and executes the necesarry functions 
while(True): 
    new_file_in_folder(path)
    time.sleep(20)