### Import packages

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import glob
import os

from add_features import add_xy_future, add_velocity_xy, add_acceleration_xy, add_average_velocity, add_orientation, add_ball_in_motion, add_distance_to_ball, add_angle_to_ball, add_offside, add_distance_to_onside, load_FM_data, add_FM_data, add_tiredness, add_tiredness_short_term
from visualize_game import visualize_game_animation, visualize_prediction_animation
from utils import google_sheet_to_df, load_processed_frames
from settings import *

2024-04-18 16:36:55.576550: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


## Features explanation

### Tiredness
The tiredness of a player can be calculated using the formula:
$$\text{Tiredness} = \left( \frac{\text{distance\_ran}}{1000} + \frac{\text{minute}}{20} + \text{period} - 1 \right) \times \left( 1 - \frac{\text{sta}}{20} \right)$$
where $\text{distance\_ran}$ is the distance ran in meters, $\text{minute}$ is the minute of the game, $\text{minute}$ is the period of the game, and $\text{sta}$ is the player's stamina rated from 1 to 20.

Explanation of formula:
For every kilometer the player runs, every 20th minute that passes, and every frame in the second half, the tiredness is increase by 1. Everything is than scaled based on the stamina of the player

## Functions for processing frames

In [2]:
# Process the unprocessed/ frames, and store the results to the processed/ fodler
def process_frames():
    # Load frames_df
    for selected_season in seasons:
        for selected_competition in competitions:
            # Define paths
            DATA_FOLDER_UNPROCESSED = f"{DATA_LOCAL_FOLDER}/data/{selected_season}/{selected_competition}/unprocessed"
            FOLDER_OUT = f"{DATA_LOCAL_FOLDER}/data/{selected_season}/{selected_competition}/processed"
            
            # Create output folder if not exists
            if not os.path.exists(FOLDER_OUT):
                    os.makedirs(FOLDER_OUT)

            # Find all frames parquet files
            match_paths = glob.glob(os.path.join(DATA_FOLDER_UNPROCESSED, "*.parquet"))

            # Extract IDs without the ".parquet" extension
            match_ids = [os.path.splitext(os.path.basename(path))[0] for path in match_paths]
            # match_ids = ['49e6bfdf-abf3-499d-b60e-cf727c6523c1']

            # Load Football Manager data
            fm_players_df = load_FM_data()

            # For all matches
            for match_id in match_ids:
                # Skip if processed game already exists, if specified
                if not reload_data and Path(f"{FOLDER_OUT}/{match_id}.parquet").exists():
                    print(f"Match {match_id} already processed. Skipping...")
                    continue  # Skip to the next match

                # Convert parquet file to a DataFrame
                file_path_match = f"{DATA_FOLDER_UNPROCESSED}/{match_id}.parquet"
                frames_df = pd.read_parquet(file_path_match)

                # Process frames_df
                frames_df = add_xy_future(frames_df, FPS * seconds_into_the_future)
                frames_df = add_velocity_xy(frames_df, 1, smooth=True)
                frames_df = add_acceleration_xy(frames_df, 1, smooth=True)
                frames_df = add_average_velocity(frames_df)
                frames_df = add_orientation(frames_df)
                frames_df = add_ball_in_motion(frames_df)
                frames_df = add_distance_to_ball(frames_df)
                frames_df = add_angle_to_ball(frames_df)
                # frames_df = add_offside(frames_df)
                # frames_df = add_distance_to_onside(frames_df)
                frames_df = add_FM_data(frames_df, fm_players_df)
                frames_df = add_tiredness(frames_df)
                frames_df = add_tiredness_short_term(frames_df, window=FPS*20)

                # Add match_id
                frames_df["match_id"] = match_id

                # Convert DataFrame to a parquet file
                frames_df.to_parquet(f"{FOLDER_OUT}/{match_id}.parquet")

                # Print that the match is processed
                print(f"Match {match_id} is processed")

# Takes the processed frames and add more features
def add_data_to_processed_frames():
    # Define the paths
    for selected_season in seasons:
        for selected_competition in competitions:
            # Define the paths
            FOLDER_OUT = f"{DATA_LOCAL_FOLDER}/data/{selected_season}/{selected_competition}/processed"

            # Find all processed frames parquet files
            processed_paths = glob.glob(os.path.join(FOLDER_OUT, "*.parquet"))

            fm_players_df = load_FM_data()

            # For each processed frame
            for processed_path in processed_paths:
                # Load the processed DataFrame
                frames_df = pd.read_parquet(processed_path)

                # Perform the operation to add data to the processed frames
                # For example:
                # frames_df = add_additional_data(frames_df)
                frames_df = add_distance_to_onside(frames_df)

                # Save the updated DataFrame back to the same file
                frames_df.to_parquet(processed_path)

                # Print that the match is processed
                match_id = processed_path.replace(FOLDER_OUT, '').strip('/').rstrip('.parquet')
                print(f"Match {match_id} is processed")

# Process and load frames
# process_frames()
# frames_dfs = load_processed_frames(n_matches=1)
add_data_to_processed_frames()

Match d4d052c4-2367-499d-a625-b65c52b2535c is processed
Match 3bcee2a3-dce2-4ec9-ab96-6c0f2a742363 is processed
Match a903fa6a-431f-4a4c-a415-1ad9ff74b092 is processed
Match 4eefde1c-1962-4ee9-8c7c-60ce73d960fd is processed
Match a702213b-a0ce-4bb1-8d5a-c0bbf27adb85 is processed
Match 13a7806b-b717-4052-b8f9-d5ce515b331f is processed
Match b73d1042-5658-4be2-93d2-a639396cd248 is processed
Match cba3f068-be39-4588-8139-a44cb7c8c3af is processed
Match e0b985f0-edaf-4e9c-a314-b16f3eb7453d is processed
Match 36761aec-d5e6-4c0e-95ff-c280f04ba5d7 is processed
Match 442302f6-33d6-4e9e-bf33-406d0a5ad246 is processed
Match dd79b17a-bd93-48e7-b925-2a52fad7df6 is processed
Match 590ae7ee-f541-433a-8515-9a85035f7e8d is processed
Match fd925348-1732-4eaa-9685-fafb48709f0d is processed
Match 766d9ff3-4562-47ad-a4bb-5ce897434a81 is processed
Match eff033b8-5cd8-4241-bfc6-42fa0bf978d3 is processed
Match 2fcbfe27-7f18-437b-8284-efac4d2265df is processed
Match e6cff0eb-f0a1-437e-85a7-b42114bf851c is pro