In [1]:
import pandas as pd
import numpy as np
import os
import sys
import math
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
from mplsoccer.pitch import Pitch
import matplotlib.patches as patches
from IPython.display import Markdown

In [2]:
import warnings
from sklearn.exceptions import UndefinedMetricWarning
from pandas.errors import SettingWithCopyWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))
warnings.simplefilter(action='ignore', category=(FutureWarning))
warnings.simplefilter(action='ignore', category=RuntimeWarning)

In [3]:
parent_dir = os.path.abspath(os.path.join(os.path.dirname("./"), '..'))
sys.path.append(parent_dir)
from tools.lineup_tpr import LineupTPR
from tools.pickler import Save_Model, Load_Model
from tools.draw_pitch_plot import Draw_Pitch_Actions
from tools.fm_attributes import Attributes
fm_attributes = Attributes.attribute_list
gk_attributes = Attributes.gk_attributes

In [4]:
players_df = pd.read_csv("../players_db/fm23/fm23db_processed.csv")
team_df = pd.read_csv("../players_db/fm23/team_ratings.csv")

<br><br><br><br><br><br><br><br>
<h2 style="color:beige;background:blue;">  Data Prep --- for Action Player Volume Regression</h2>

<br><br>
<h3 style="color:yellow;background:pink;">  Passing</h3>

In [5]:
# url = "https://fbref.com/en/comps/Big5/2022-2023/passing/players/2022-2023-Big-5-European-Leagues-Stats"
# dataPASSING = pd.read_html(url)
# dataPASSING[0].to_csv("data/Big5@Players@Passing@2022_2023.csv",index=False)
# # Att, Att, Att -> same column names occurs
# fb = dataPASSING[0]
# #fb.columns
# fb['90s'] = pd.to_numeric(fb[('Unnamed: 8_level_0','90s')], errors='coerce')
# fb['Att'] = pd.to_numeric(fb[('Total','Att')], errors='coerce')
# fb['Pos'] = fb[('Unnamed: 3_level_0','Pos')]
# fb['Squad'] = fb[('Unnamed: 4_level_0','Squad')]
# fb['Player'] = fb[('Unnamed: 1_level_0','Player')]
# fb['Pass/90'] = fb['Att'] / fb['90s']
# fb2 = fb[fb["90s"] > 15]
# fb2.columns = fb2.columns.get_level_values(0)
# fb2.sort_values(by="Pass/90", ascending=False).head(3)[["Pos","Squad","Player","Att","Pass/90"]]

In [6]:
fb2_raw = pd.read_csv("data/Big5@Players@Passing@2022_2023.csv")
fb2 = fb2_raw[fb2_raw["90s"] > 15]
fb_passing = fb2.sort_values(by="Pass/90", ascending=False)[["Pos","Squad","Player","Att","Pass/90"]]
#fb_passing["Pos"] = fb_passing["Pos"].apply(lambda row: row.split(",")[0])
pass_merged_df = pd.merge(players_df, fb_passing, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))

In [7]:
passing_positions = pass_merged_df.sort_values(by="Pass/90", ascending=False).head(50)["Pos_Rank_Average"]
print(passing_positions.mean())
passing_ratings = pass_merged_df.sort_values(by="Pass/90", ascending=False).head(50)[fm_attributes].mean().sort_values(ascending=False)

4.24


<br><br>
<h3 style="color:beige;background:red;">  Shooting</h3>

In [8]:
# url = "https://fbref.com/en/comps/Big5/2022-2023/shooting/players/2022-2023-Big-5-European-Leagues-Stats"
# dataSHOOTING = pd.read_html(url)
# dataSHOOTING[0].to_csv("data/Big5@Players@Shooting@2022_2023.csv",index=False)
# dataSHOOTING[0].columns = dataSHOOTING[0].columns.get_level_values(1)
# fb = dataSHOOTING[0]
# fb.columns
# fb['90s'] = pd.to_numeric(fb['90s'], errors='coerce')
# fb['Sh'] = pd.to_numeric(fb['Sh'], errors='coerce')
# fb['Sh/90'] = pd.to_numeric(fb['Sh/90'], errors='coerce')
# fb2 = fb[fb["90s"] > 15]
# fb2.sort_values(by="Sh/90", ascending=False).head(3)[["Pos","Squad","Player","Sh","Sh/90"]]

In [9]:
fb2_raw = pd.read_csv("data/Big5@Players@Shooting@2022_2023.csv")
fb2 = fb2_raw[fb2_raw["90s"] > 15]
fb_shooters = fb2.sort_values(by="Sh/90", ascending=False)[["Pos","Squad","Player","Sh","Sh/90"]]
fb_shooters["Pos"] = fb_shooters["Pos"].apply(lambda row: row.split(",")[0])
sh_merged_df_raw = pd.merge(players_df, fb_shooters, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))
sh_merged_df = sh_merged_df_raw[sh_merged_df_raw["Best_Pos"] != "GK"]

In [10]:
shooters_positions = sh_merged_df.sort_values(by="Sh/90", ascending=False).head(50)["Pos_Rank_Average"]
print(shooters_positions.mean())
shooters_ratings = sh_merged_df.sort_values(by="Sh/90", ascending=False).head(50)[fm_attributes].mean().sort_values(ascending=False)

11.3


<br><br>
<h3 style="color:red;background:orange;">  Dribbling</h3>

In [11]:
# url = "https://fbref.com/en/comps/Big5/2022-2023/possession/players/2022-2023-Big-5-European-Leagues-Stats"
# dataDRIBBLING = pd.read_html(url)
# dataDRIBBLING[0].to_csv("data/Big5@Players@Dribbling@2022_2023.csv",index=False)
# fb = dataDRIBBLING[0]
# #fb.columns
# fb['90s'] = pd.to_numeric(fb[('Unnamed: 8_level_0','90s')], errors='coerce')
# fb['Att'] = pd.to_numeric(fb[('Take-Ons','Att')], errors='coerce')
# fb['Pos'] = fb[('Unnamed: 3_level_0','Pos')]
# fb['Squad'] = fb[('Unnamed: 4_level_0','Squad')]
# fb['Player'] = fb[('Unnamed: 1_level_0','Player')]
# fb['Dribble/90'] = fb['Att'] / fb['90s']
# fb2 = fb[fb["90s"] > 15]
# fb2.columns = fb2.columns.get_level_values(0)
# fb2.sort_values(by="Dribble/90", ascending=False).head(3)[["Pos","Squad","Player","Att","Dribble/90"]]

In [12]:
fb2_raw = pd.read_csv("data/Big5@Players@Dribbling@2022_2023.csv")
fb2 = fb2_raw[fb2_raw["90s"] > 15]
fb_dribbling = fb2.sort_values(by="Dribble/90", ascending=False)[["Pos","Squad","Player","Att","Dribble/90"]]
#fb_dribbling["Pos"] = fb_dribbling["Pos"].apply(lambda row: row.split(",")[0])
dribble_merged_df = pd.merge(players_df, fb_dribbling, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))

In [13]:
dribbling_positions = dribble_merged_df.sort_values(by="Dribble/90", ascending=False).head(50)["Pos_Rank_Average"]
print(dribbling_positions.mean())
dribbling_ratings= dribble_merged_df.sort_values(by="Dribble/90", ascending=False).head(50)[fm_attributes].mean().sort_values(ascending=False)

9.92


In [14]:
# pass_merged_df, sh_merged_df, dribble_merged_df

In [15]:
# passing_top_cols, shooting_top_cols, dribbling_top_cols

In [16]:
top_N = 12

In [17]:
shooting_top_cols = shooters_ratings[:top_N].index
shooting_top_cols

Index(['Off_the_Ball', 'Technique', 'Flair', 'Pace', 'Acceleration',
       'First_Touch', 'Finishing', 'Agility', 'Determination', 'Dribbling',
       'Anticipation', 'Natural_Fitness'],
      dtype='object')

In [18]:
passing_top_cols = passing_ratings[:top_N].index
passing_top_cols

Index(['Determination', 'Anticipation', 'Teamwork', 'Stamina', 'Composure',
       'Natural_Fitness', 'Passing', 'Work_Rate', 'Tackling', 'Decisions',
       'First_Touch', 'Technique'],
      dtype='object')

In [19]:
dribbling_top_cols = dribbling_ratings[:top_N].index
dribbling_top_cols

Index(['Dribbling', 'Acceleration', 'Flair', 'Pace', 'Technique', 'Agility',
       'First_Touch', 'Determination', 'Natural_Fitness', 'Off_the_Ball',
       'Passing', 'Vision'],
      dtype='object')

<br><br><br><br><br><br><br><br>
<h2 style="color:pink;background:purple;">  Data Prep --- for Action Player Accuracy Regression</h2>

<br>
<h3 style="color:black;background:white;">  Passing</h3>

In [20]:
# fb2_raw = pd.read_csv("data/Big5@Players@Passing@2022_2023.csv")
# fb2_raw["Pass_Accuracy"] = pd.to_numeric(fb2_raw["Total.2"], errors="coerce")
# fb2_raw.to_csv("data/Big5@Players@Passing@2022_2023.csv", index=False)

In [21]:
fb2_raw = pd.read_csv("data/Big5@Players@Passing@2022_2023.csv")
fb2_raw = fb2_raw[fb2_raw["Pos"] != "I WANT GK"]
fb2_raw["Att"] = pd.to_numeric(fb2_raw["Att"], errors="coerce")
filter_value = fb2_raw["Att"].max() / 10
print(filter_value)

322.3


In [22]:
fb2 = fb2_raw[fb2_raw["Att"] > filter_value]
fb2["Pass_Accuracy"] = pd.to_numeric(fb2["Total.2"])
fb_passing_acc = fb2.sort_values(by="Pass_Accuracy", ascending=False)[["Pos","Squad","Player","Pass_Accuracy","Att"]]
#fb_passing_acc["Pos"] = fb_passing_acc["Pos"].apply(lambda row: row.split(",")[0])
pass_merged_df = pd.merge(players_df, fb_passing_acc, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))

In [23]:
passing_positions = pass_merged_df.sort_values(by="Pass_Accuracy", ascending=False).head(50)["Pos_Rank_Average"]
print(passing_positions.mean())
passing_ratings = pass_merged_df.sort_values(by="Pass_Accuracy", ascending=False).head(50)[fm_attributes].mean().sort_values(ascending=False)

4.54


<br>
<h3 style="color:black;background:white;">  Shooting</h3>

In [24]:
# fb2_raw = pd.read_csv("data/Big5@Players@Shooting@2022_2023.csv")
# fb2_raw["Shot_Accuracy"] = pd.to_numeric(fb2_raw["SoT%"], errors="coerce")
# fb2_raw.to_csv("data/Big5@Players@Shooting@2022_2023.csv", index=False)

In [25]:
fb2_raw = pd.read_csv("data/Big5@Players@Shooting@2022_2023.csv")
fb2_raw = fb2_raw[fb2_raw["Pos"] != "GK"]
fb2_raw["Sh"] = pd.to_numeric(fb2_raw["Sh"], errors="coerce")
filter_value = fb2_raw["Sh"].max() / 4
print(filter_value)

36.0


In [26]:
fb2 = fb2_raw[fb2_raw["Sh"] > filter_value]
fb2["Shot_Accuracy"] = pd.to_numeric(fb2["SoT%"])
fb2.to_csv("data/Big5@Players@Shooting@2022_2023.csv", index=False)
fb_shooting_acc = fb2.sort_values(by="Shot_Accuracy", ascending=False)[["Pos","Squad","Player","Shot_Accuracy","Sh"]]
#fb_shooting_acc["Pos"] = fb_shooting_acc["Pos"].apply(lambda row: row.split(",")[0])
sh_merged_df = pd.merge(players_df, fb_shooting_acc, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))

In [27]:
shooters_positions = sh_merged_df.sort_values(by="Shot_Accuracy", ascending=False).head(50)["Pos_Rank_Average"]
print(shooters_positions.mean())
shooters_ratings = sh_merged_df.sort_values(by="Shot_Accuracy", ascending=False).head(50)[fm_attributes].mean().sort_values(ascending=False)

10.62


<br>
<h3 style="color:black;background:white;">  Dribbling</h3>

In [28]:
# fb2_raw = pd.read_csv("data/Big5@Players@Dribbling@2022_2023.csv")
# fb2_raw["Dribble_Accuracy"] = pd.to_numeric(fb2_raw["Take-Ons.2"], errors="coerce")
# fb2_raw.to_csv("data/Big5@Players@Dribbling@2022_2023.csv", index=False)

In [29]:
fb2_raw = pd.read_csv("data/Big5@Players@Dribbling@2022_2023.csv")
fb2_raw = fb2_raw[fb2_raw["Pos"] != "GK"]
fb2_raw["Take-Ons"] = pd.to_numeric(fb2_raw["Take-Ons"], errors="coerce")
filter_value = fb2_raw["Take-Ons"].max() / 3
print(filter_value)

102.0


In [30]:
fb2 = fb2_raw[fb2_raw["Take-Ons"] > filter_value].dropna()
fb2["Dribble_Accuracy"] = pd.to_numeric(fb2["Take-Ons.2"])
fb_dribbling_acc = fb2.sort_values(by="Dribble_Accuracy", ascending=False)[["Pos","Squad","Player","Dribble_Accuracy","Take-Ons"]]
#fb_dribbling_acc["Pos"] = fb_dribbling_acc["Pos"].apply(lambda row: row.split(",")[0])
dribble_merged_df = pd.merge(players_df, fb_dribbling_acc, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))

In [31]:
dribbling_positions = dribble_merged_df.sort_values(by="Dribble_Accuracy", ascending=False).head(50)["Pos_Rank_Average"]
print(dribbling_positions.mean())
dribbling_ratings= dribble_merged_df.sort_values(by="Dribble_Accuracy", ascending=False).head(50)[fm_attributes].mean().sort_values(ascending=False)

9.5


<br><br><br><br><br><br><br><br>
<h2 style="color:lightgreen;background:blue;">Action Player Volume & Accuracy Regression</h2>

In [74]:
def Prepare_Action_Player_Volume_Accuracy_Regression_Data(players_df, csv_file, filter_column, filter_value, target_variable, target_volume=None, top_playersN=None, top_colsN=None, fm_attributes=None, gk_excluded=None):
    fb2_raw = pd.read_csv(csv_file)
    fb2_raw[filter_column] = pd.to_numeric(fb2_raw[filter_column], errors="coerce")
    fb2 = fb2_raw[fb2_raw[filter_column] > filter_value]
    if target_volume:
        fb_actions = fb2.sort_values(by=target_variable, ascending=False)[["Pos","Squad","Player",target_volume,target_variable]]
    else:
        fb_actions = fb2.sort_values(by=target_variable, ascending=False)[["Pos","Squad","Player",target_volume,target_variable]]
    fb_actions["Pos"] = fb_actions["Pos"].apply(lambda row: row.split(",")[0])
    merged_df_raw = pd.merge(players_df, fb_actions, how="inner", left_on="Name", right_on="Player", suffixes=("","_fb"))
    if gk_excluded:
        merged_df = merged_df_raw[merged_df_raw["Best_Pos"] != "GK"]
    else:
        merged_df = merged_df_raw
    top_players = merged_df.sort_values(by=target_variable, ascending=False).head(top_playersN)
    position_mean = top_players["Pos_Rank_Average"].mean()
    top_attributes = top_players[fm_attributes].mean().sort_values(ascending=False).head(top_colsN).index
    
    return {
        "zero": 0,
        "target": target_variable,
        "df": merged_df,
        "position_mean": position_mean,
        "top_playersN": top_playersN,
        "reg_cols": np.concatenate((top_attributes, ["Pos_Rank_Average"], ["tpr","tpr_club"])),
    }

In [75]:
from tools.pickler import Save_Model, Load_Model
#Save_Model("_", "models/XXXXXXXXXX_per_90_model", model)

def Train_Model(df, reg_cols, target, test_size):
    from sklearn.model_selection import train_test_split, GridSearchCV
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.linear_model import LinearRegression 
    from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

    X = df[reg_cols]
    y = df[target].astype(float)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=0)
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred_raw = model.predict(X_test)
    y_pred = [max(0, pred) for pred in y_pred_raw]
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    return {
        "model": model,
        "mse": mse,
        "mae": mae,
        "r2": r2
    }

In [257]:
def DictToPredictionDict(d: dict) -> pd.DataFrame:
    rdict = {}
    for k,v in d.items():
        rdict[k] = [v]
    return pd.DataFrame(rdict)

def PlayerPredict(model, player_name, print_stats=False):
    if print_stats:
        selected_df = current_df 
    else:
        selected_df = current_df_extra
    d0 = selected_df.query(f"Name == '{player_name}'").iloc[0]
    d1 = d0[current_cols].to_dict()
    d2 = DictToPredictionDict(d1)
    
    print(f"Name:\t\t{d0['Name']}")
    print(f"Pos_Rank_Avg:\t{d0['Pos_Rank_Average']}")
    if print_stats:
        print(f"Real:\t\t{d0[current_target]}")
    prediction = max(0, model.predict(d2)[0])
    print(f"Prediction:\t[{prediction}]")
    return prediction

def Model_Predict(model, row, cols):
    d1 = row[cols].to_dict()
    d2 = DictToPredictionDict(d1)
    prediction = max(0, model.predict(d2)[0])
    return float(round(prediction, 2))

<br><br>

<br><br><br><br><br><br><br><br>
<h2 style="color:aqua;background:yellow;">  Experiment Area</h2>

In [251]:
hyper_df = pd.read_excel("data/player_action_hyper.xlsx")
title_df = hyper_df[hyper_df["Title"]=="Passing_Volume"].iloc[0]
title_df

Title                                  Passing_Volume
csv_file           Big5@Players@Passing@2022_2023.csv
gk_excluded                                     False
filter_column                                     90s
filter_value                                       21
top_playersN                                       60
top_colsN                                          46
test_size                                         0.2
MAE                                            6553.0
target_variable                               Pass/90
target_volume                                     Att
Name: 0, dtype: object

In [252]:
prep_by = Prepare_Action_Player_Volume_Accuracy_Regression_Data(
    players_df,
    csv_file="data/"+title_df["csv_file"],
    filter_column=title_df["filter_column"],
    filter_value=title_df["filter_value"],
    target_variable=title_df["target_variable"],
    target_volume=title_df["target_volume"],
    top_playersN=title_df["top_playersN"],
    top_colsN=title_df["top_colsN"],
    fm_attributes=fm_attributes,
    gk_excluded=title_df["gk_excluded"]
)

chosen_dict = {
    "df": prep_by["df"],
    "reg_cols": prep_by["reg_cols"],
    "target": prep_by["target"],
    "test_size": title_df["test_size"],
}

In [253]:
current_df = pd.merge(team_df, chosen_dict["df"], how="inner", left_on="fbref_name", right_on="Squad", suffixes=("_club",""))

In [254]:
model = Train_Model(current_df, chosen_dict["reg_cols"], chosen_dict["target"], chosen_dict["test_size"])["model"]

In [255]:
all_df = pd.merge(team_df, players_df, how="inner", left_on="Club", right_on="Club", suffixes=("_club",""))
test_row = all_df.iloc[0]

In [256]:
Model_Predict(model, test_row, chosen_dict["reg_cols"])

80.67

In [258]:
PlayerPredict(model, "Kevin De Bruyne", True)

NameError: name 'current_cols' is not defined

In [225]:
# actions_loop_list = [
#         {"filter_min":900,"filter_max":1800,"filter_step":100,"target_variable":"Pass_Accuracy","target_volume":"Att","csv_file":"data/Big5@Players@Passing@2022_2023.csv","filter_column":"Att","gk_excluded":False},
#         {"filter_min":12,"filter_max":24,"filter_step":3,"target_variable":"Pass/90","target_volume":"Att","csv_file":"data/Big5@Players@Passing@2022_2023.csv","filter_column":"90s","gk_excluded":False},
#         {"filter_min":30,"filter_max":40,"filter_step":3,"target_variable":"Shot_Accuracy","target_volume":"Sh","csv_file":"data/Big5@Players@Shooting@2022_2023.csv","filter_column":"Sh","gk_excluded":True},
#         {"filter_min":12,"filter_max":24,"filter_step":3,"target_variable":"Sh/90","target_volume":"Sh","csv_file":"data/Big5@Players@Shooting@2022_2023.csv","filter_column":"90s","gk_excluded":True},
#         {"filter_min":30,"filter_max":60,"filter_step":3,"target_variable":"Dribble_Accuracy","target_volume":"Take-Ons","csv_file":"data/Big5@Players@Dribbling@2022_2023.csv","filter_column":"Take-Ons","gk_excluded":True},
#         {"filter_min":12,"filter_max":24,"filter_step":3,"target_variable":"Dribble/90","target_volume":"Att","csv_file":"data/Big5@Players@Dribbling@2022_2023.csv","filter_column":"90s","gk_excluded":True},
#     ]

# with open("data/models_hyperparam.txt", "w", encoding="utf-8") as f:
#     f.truncate(0)
    
# for loop in actions_loop_list: #[3:4]:
#     mae = 100
#     target_variable = loop["target_variable"]
#     target_volume = loop["target_volume"]
#     csv_file = loop["csv_file"]
#     filter_column = loop["filter_column"]
#     gk_excluded = loop["gk_excluded"]
#     range_df = pd.read_csv(csv_file)
#     filter_min = loop["filter_min"]
#     filter_max = loop["filter_max"]
#     filter_step = loop["filter_step"]
    
#     for filter_value in range(filter_min, filter_max, filter_step):
#         for top_playersN in range(20, 100, 10):
#             for top_colsN in range(10, len(fm_attributes), 3):
#                 prep_by = Prepare_Action_Player_Volume_Accuracy_Regression_Data(
#                     players_df,
#                     csv_file=csv_file,
#                     filter_column=filter_column,
#                     filter_value=filter_value,
#                     target_variable=target_variable,
#                     target_volume=target_volume,
#                     top_playersN=top_playersN,
#                     top_colsN=top_colsN,
#                     fm_attributes=fm_attributes,
#                     gk_excluded=gk_excluded
#                 )
                
#                 chosen_dict = {
#                     "df": prep_by["df"],
#                     "reg_cols": prep_by["reg_cols"],
#                     "target": prep_by["target"],
#                 }
#                 current_df = pd.merge(team_df, chosen_dict["df"], how="inner", left_on="fbref_name", right_on="Squad", suffixes=("_club",""))
#                 for num in range(10,30+1): #test size between %10 to %30
#                     test_size = round(num/100, 2)
#                     this_mae = Train_Model(current_df, chosen_dict["reg_cols"], chosen_dict["target"], test_size)["mae"]
#                     if this_mae < mae:
#                         mae = this_mae
#                         best_result = (target_variable, filter_value, top_playersN, top_colsN, test_size, mae)
#                         markdown_str = f"""
#                         | Target | Filter Value | Top Players N | Top Columns N | Test Size | Mean Absolute Error (MAE) |
#                         |--------|--------------|---------------|---------------|-----------|----------------------------|
#                         | {best_result[0]} | {best_result[1]} | {best_result[2]} | {best_result[3]} | {best_result[4]} | {best_result[5]} |
#                         """
#                         with open("data/models_hyperparam.txt", "a", encoding="utf-8") as f:
#                             f.write(markdown_str)
#                             f.write("\n\n")
#                         #display(Markdown(markdown_str))
#     print(target_variable)

<br><br><br><br><br><br><br><br>
<h2 style="color:white;background:orange;">  Predict</h2>

In [85]:
location_df = pd.read_csv("data/location_df.csv")
volume_formula_df = pd.read_csv("formulas/Attacking_Touches_volume_formula_df.csv") # Attacking_Touches

<br>

In [None]:
fm_managerial_tactics = {
    'Standard': {"Pass": 1.0, "Shot": 1.0, "Dribble": 1.0},
    'Gegen - Direct': {"Pass": 1.2, "Shot": 1.0, "Dribble": 0.8},
    'Gegen - High Tempo': {"Pass": 1.1, "Shot": 1.1, "Dribble": 0.9},
    'Gegen - Slow Passing': {"Pass": 0.9, "Shot": 1.0, "Dribble": 1.0},
    'Gegen - Wing Play': {"Pass": 1.3, "Shot": 1.0, "Dribble": 0.7},
    
    'Tiki Taka - Direct': {"Pass": 1.5, "Shot": 0.8, "Dribble": 0.8},
    'Tiki Taka - High Tempo': {"Pass": 1.4, "Shot": 0.9, "Dribble": 0.8},
    'Tiki Taka - Slow Passing': {"Pass": 1.6, "Shot": 0.7, "Dribble": 0.9},
    'Tiki Taka - Wing Play': {"Pass": 1.7, "Shot": 0.6, "Dribble": 0.8},
    
    'Control - Direct': {"Pass": 1.1, "Shot": 1.0, "Dribble": 1.0},
    'Control - High Tempo': {"Pass": 1.0, "Shot": 1.1, "Dribble": 1.0},
    'Control - Slow Passing': {"Pass": 1.0, "Shot": 0.9, "Dribble": 1.1},
    'Control - Wing Play': {"Pass": 1.2, "Shot": 1.0, "Dribble": 0.9},
    
    'Counter - Direct': {"Pass": 0.8, "Shot": 1.2, "Dribble": 1.0},
    'Counter - High Tempo': {"Pass": 0.9, "Shot": 1.1, "Dribble": 1.1},
    'Counter - Slow Passing': {"Pass": 0.7, "Shot": 1.0, "Dribble": 1.2},
    'Counter - Wing Play': {"Pass": 0.8, "Shot": 1.3, "Dribble": 1.0},
    
    'Total Football - Direct': {"Pass": 1.2, "Shot": 1.2, "Dribble": 1.0},
    'Total Football - High Tempo': {"Pass": 1.1, "Shot": 1.3, "Dribble": 1.0},
    'Total Football - Slow Passing': {"Pass": 1.3, "Shot": 1.1, "Dribble": 0.9},
    'Total Football - Wing Play': {"Pass": 1.4, "Shot": 1.0, "Dribble": 0.8},
}

selected_tactics = [
    fm_managerial_tactics['Standard'],
    fm_managerial_tactics['Standard'],
    fm_managerial_tactics['Standard'],
    fm_managerial_tactics['Standard'],
    fm_managerial_tactics['Standard'],
    fm_managerial_tactics['Standard'],
    fm_managerial_tactics['Counter - High Tempo'],
    fm_managerial_tactics['Counter - Wing Play'],
]

In [None]:
def create_triple_prob_dict(row, tactical_coef={"Pass": 0, "Shot": 0, "Dribble": 0}):
    prob_dict = {
        "Pass": row["pass_prob"]+row["pass_prob"]*tactical_coef["Pass"],
        "Shot": row["shot_prob"]+row["shot_prob"]*tactical_coef["Shot"],
        "Dribble": row["dribble_prob"]+row["dribble_prob"]*tactical_coef["Dribble"]
    }
    sum_prob_dict = sum(prob_dict.values())
    weighted_prob_dict = {key: prob_dict[key]/sum_prob_dict for key in prob_dict.keys()}
    return weighted_prob_dict

In [None]:
def Action_Volume_Predict_Randomnessed(attribute, formula_df, tpr_x, tpr_y):
    input_df = pd.Series({"tpr_x": tpr_x, "tpr_y": tpr_y})
    custom_cols = list(input_df.index)
    coefficients = sum(formula_df.loc[:, f"{attribute}_coef"] * input_df[custom_cols].values)
    result = (coefficients + formula_df.loc[:, f"{attribute}_intercept"])
    randomnessed_result = result.iloc[0] - result.iloc[0] * (np.random.randint(-10, 10)/100)
    return max(0, int(randomnessed_result / 90))

In [None]:
def Action_Volume_Predict(attribute, formula_df, tpr_x, tpr_y):
    input_df = pd.Series({"tpr_x": tpr_x, "tpr_y": tpr_y})
    custom_cols = list(input_df.index)
    coefficients = sum(formula_df.loc[:, f"{attribute}_coef"] * input_df[custom_cols].values)
    result = (coefficients + formula_df.loc[:, f"{attribute}_intercept"])
    return max(0, int(result.iloc[0] / 90))

In [None]:
Action_Volume_Predict("Attacking_Touches", volume_formula_df, 92, 77)

In [None]:
def Generate_Random_Match_Stats_Per_Minute(location_df, formula_df, tpr_x, tpr_y, match_number=1):
    all_matches= []
    shot_map = []
    pass_map = []
    dribble_map = []
    location_df["triple_prob_dict"] = location_df.apply(lambda row: create_triple_prob_dict(row, selected_tactics[2]), axis=1)
    
    for process in range(match_number):
        Action_Counter = {"Pass": 0, "Shot": 0, "Dribble": 0}
        Touches = Action_Volume_Predict_Randomnessed("Attacking_Touches", formula_df, tpr_x, tpr_y)
        for minute in range(90):
            for moment in range(Touches):
                pitch_number = np.random.choice(location_df["Pitch_Number"], p=location_df["pitch_prob"])
                triple_probs = list(location_df[location_df["Pitch_Number"] == pitch_number]["triple_prob_dict"].iloc[0].values())
                triple_actions = list(location_df[location_df["Pitch_Number"] == pitch_number]["triple_prob_dict"].iloc[0].keys())
                action = np.random.choice(triple_actions, p=triple_probs)
                
                if action=="Pass":
                    pass_map.append(pitch_number)
                elif action=="Shot":
                    shot_map.append(pitch_number)
                elif action=="Dribble":
                    dribble_map.append(pitch_number)
                Action_Counter[action]+=1
            
        all_matches.append(Action_Counter)

        global progress
        progress = int((process + 1) / match_number * 100)
                      
    #print("!__completed__!")
    return {
        "match_data": all_matches,
        "pass_map": pass_map, 
        "shot_map": shot_map, 
        "dribble_map": dribble_map, 
    }

In [None]:
touches_df_raw_1 = pd.read_csv("../match_logs/Big5@22-23@attacking_touches.csv")
touches_df_raw_2 = pd.merge(touches_df_raw_1, team_df, left_on="Club", right_on="Club", how="inner") 
touches_df = pd.merge(touches_df_raw_2, team_df, left_on="Opponent", right_on="Club", how="inner")  

In [None]:
t = touches_df[["Club_x","tpr_x","tpr_y","Club_y","Date"]].iloc[25]
tpr_a = LineupTPR(players_df, t.Club_x).query("Best_Pos != 'GK'")["tpr"].mean()
tpr_b = LineupTPR(players_df, t.Club_y).query("Best_Pos != 'GK'")["tpr"].mean()
t

In [None]:
st1 = Generate_Random_Match_Stats_Per_Minute(location_df, volume_formula_df, tpr_a, tpr_b)["match_data"]
st2 = Generate_Random_Match_Stats_Per_Minute(location_df, volume_formula_df, tpr_b, tpr_a)["match_data"]
print(tpr_a, t.Club_x, st1)
print(tpr_b, t.Club_y, st2)

<br><br><br><br><br><br><br><br>
<h2 style="color:white;background:red;">  yyyydffff</h2>

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression

# Create DataFrame
df = merged_df.copy()

# Normalize the data
# scaler = MinMaxScaler()
# df[['Att', 'Dribbling', 'Pace', "tpr"]] = scaler.fit_transform(df[['Att', 'Dribbling', 'Pace', "tpr"]])

# Separate features and target variable
X = df[['Dribbling', 'Pace', "tpr"]]
y = df['Att']

# Create and train the model
model = LinearRegression()
model.fit(X, y)

# Get the coefficients
coefficients = model.coef_
intercept = model.intercept_

print("Coefficients:", coefficients)
print("Intercept:", intercept)

In [None]:
def f(dribbling, pace, tpr):
    return intercept + coefficients[0] * dribbling + coefficients[1] * pace + coefficients[2] * tpr

f(56, 72, 72)

In [None]:
# sort_dribble = df.sort_values("Att", ascending=False)
# sort_dribble[list(set(fm_attributes) - set(gk_attributes))].head(15).mean().sort_values(ascending=False)

In [None]:
pass_df = pd.read_csv("../match_logs/Big5@22-23@passing.csv")[["Att","Club"]].groupby(by="Club").mean().reset_index().sort_values("Att", ascending=False)
shot_df = pd.read_csv("../match_logs/Big5@22-23@shooting.csv")[["Sh","Club"]].groupby(by="Club").mean().reset_index().sort_values("Sh", ascending=False)
dribble_df = pd.read_csv("../match_logs/Big5@22-23@possession.csv")[["Att","Club"]].groupby(by="Club").mean().reset_index().sort_values("Att", ascending=False)
merged_df = pd.merge(
    pass_df.merge(shot_df, on="Club", suffixes=("", "_shot")),
    dribble_df, on="Club", suffixes=("_pass", "_dribble")
)
merged_df.rename(columns={"Sh":"Att_shot"}, inplace=True)
action_columns = merged_df.select_dtypes(exclude="object").columns

In [None]:
merged_df["Total_Actions"] = merged_df.apply(lambda row: row[action_columns].sum(), axis=1)
merged_df["Triple_Prob_Dict"] = merged_df.apply(lambda row: {
    "Pass": (row["Att_pass"]/row["Total_Actions"]),
    "Shot": row["Att_shot"]/row["Total_Actions"],
    "Dribble": row["Att_dribble"]/row["Total_Actions"],
}, axis=1)

In [None]:
exrow = merged_df.sort_values("Att_dribble",ascending=False).iloc[72].Triple_Prob_Dict
avg = location_df[["pass_prob","shot_prob","dribble_prob"]].mean()

In [None]:
d = {
    "p": exrow["Pass"] /  avg["pass_prob"],# * f["pass_prob"],
    "s": exrow["Shot"] / avg["shot_prob"],#  * f["shot_prob"],
    "d": exrow["Dribble"] / avg["dribble_prob"],#  * f["shot_prob"],
}
d