### Import packages

In [2]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from scipy.signal import savgol_filter
from sklearn.pipeline import Pipeline

from pathlib import Path
import pandas as pd
import numpy as np
import random
import glob
import os

from add_features import add_xy_future, add_velocity_xy, add_acceleration_xy, add_orientation, add_ball_in_motion, add_distance_to_ball, add_angle_to_ball, add_FM_data, add_tiredness, add_offside 
from visualize_game import visualize_game_animation, visualize_prediction_animation
from utils import google_sheet_to_df, load_processed_frames
from settings import *

2024-04-03 18:00:37.967560: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


### Necessary columns
| Column Name    | Description                                        |
|----------------|----------------------------------------------------|
| player         | The name of the player                             |
| jersey_number  | The jersey number of the player                    |
| team           | 'home_team', 'away_team', or 'ball'                |
| team_name      | The team of the player                             |
| period         | The period of the game (1 or 2)                    |
| minute         | The minute of the game                             |
| second         | The second within the current minute               |
| frame          | The frame of the game                              |
| distance_ran   | The cumulative distance covered by the player      |
| x              | The x-coordinate of the player                     |
| y              | The y-coordinate of the player                     |

## Define make prediction

In [None]:
# Add all features
def add_all_features(frames_df):
    # Add the following features
    add_xy_future(frames_df, FPS * seconds_into_the_future)
    add_velocity_xy(frames_df, 1, smooth=True)
    add_acceleration_xy(frames_df, 1, smooth=True)
    add_orientation(frames_df)
    add_ball_in_motion(frames_df)
    add_distance_to_ball(frames_df)
    add_angle_to_ball(frames_df)
    add_FM_data(frames_df, fm_players_df)
    add_tiredness(frames_df)

In [1]:

def make_predictions(frames_df, model_name):
    # Prepare the DataFrame by adding all features
    add_all_features(frames_df)

    # Run the model and the vectors 'x_future_pred' and 'y_future_pred'
    frames_df = run_model()

## Example usage

In [None]:
# Example match
DATA_FOLDER_UNPROCESSED = f"{DATA_LOCAL_FOLDER}/data/2023/Allsvenskan/unprocessed"
match_id = '49e6bfdf-abf3-499d-b60e-cf727c6523c1'
file_path_match = f"{DATA_FOLDER_UNPROCESSED}/{match_id}.parquet"

# Convert parquet file to a DataFrame
frames_df = pd.read_parquet(file_path_match)

# Make the prediction
make_predictions(frames_df, "NN_best_v1")

In [2]:
# Process the unprocessed/ frames, and store the results to the processed/ fodler
def process_frames():
    # Load frames_df
    for selected_season in seasons:
        for selected_competition in competitions:
            # Define paths
            DATA_FOLDER_UNPROCESSED = f"{DATA_LOCAL_FOLDER}/data/{selected_season}/{selected_competition}/unprocessed"
            FOLDER_OUT = f"{DATA_LOCAL_FOLDER}/data/{selected_season}/{selected_competition}/processed"
            
            # Create output folder if not exists
            if not os.path.exists(FOLDER_OUT):
                    os.makedirs(FOLDER_OUT)

            # Find all frames parquet files
            match_paths = glob.glob(os.path.join(DATA_FOLDER_UNPROCESSED, "*.parquet"))

            # Extract IDs without the ".parquet" extension
            match_ids = [os.path.splitext(os.path.basename(path))[0] for path in match_paths]
            # match_ids = ['49e6bfdf-abf3-499d-b60e-cf727c6523c1']

            # Load Football Manager data
            fm_players_df = load_FM_data()

            # For all matches
            for match_id in match_ids:
                # Skip if processed game already exists, if specified
                if not reload_data and Path(f"{FOLDER_OUT}/{match_id}.parquet").exists():
                    print(f"Match {match_id} already processed. Skipping...")
                    continue  # Skip to the next match

                # Convert parquet file to a DataFrame
                file_path_match = f"{DATA_FOLDER_UNPROCESSED}/{match_id}.parquet"
                frames_df = pd.read_parquet(file_path_match)

                # Process frames_df
                add_xy_future(frames_df, FPS * seconds_into_the_future)
                add_velocity_xy(frames_df, 1, smooth=True)
                add_acceleration_xy(frames_df, 1, smooth=True)
                add_orientation(frames_df)
                add_ball_in_motion(frames_df)
                add_distance_to_ball(frames_df)
                add_angle_to_ball(frames_df)
                add_FM_data(frames_df, fm_players_df)
                add_tiredness(frames_df)
                # add_offside(frames_df)

                # Add match_id
                frames_df["match_id"] = match_id

                # Convert DataFrame to a parquet file
                frames_df.to_parquet(f"{FOLDER_OUT}/{match_id}.parquet")

                # Print that the match is processed
                print(f"Match {match_id} is processed")

# Takes the processed frames and add more features
def add_data_to_processed_frames():
    # Define the paths
    for selected_season in seasons:
        for selected_competition in competitions:
            # Define the paths
            FOLDER_OUT = f"{DATA_LOCAL_FOLDER}/data/{selected_season}/{selected_competition}/processed"

            # Find all processed frames parquet files
            processed_paths = glob.glob(os.path.join(FOLDER_OUT, "*.parquet"))

            # fm_players_df = load_FM_data()

            # For each processed frame
            for processed_path in processed_paths:
                # Load the processed DataFrame
                frames_df = pd.read_parquet(processed_path)

                # Perform the operation to add data to the processed frames
                # For example:
                # add_additional_data(frames_df)
                add_tiredness(frames_df)

                # Save the updated DataFrame back to the same file
                frames_df.to_parquet(processed_path)

                # Print that the match is processed
                match_id = processed_path.replace(FOLDER_OUT, '').strip('/').rstrip('.parquet')
                print(f"Match {match_id} is processed")

# Process and load frames
# process_frames()
frames_dfs = load_processed_frames(n_matches=10)
# add_data_to_processed_frames()