In [15]:

# used libraries 
import json
import pandas as pd
import matplotlib.pyplot as plt

# Similar Player Component used libraries 
from sklearn.metrics.pairwise import cosine_similarity # Cosine Similarity
from sklearn.cluster import KMeans # Kmeans clustering

# evaluation
from sklearn.metrics import silhouette_score
from scipy.stats import spearmanr
from scipy.stats import kendalltau

# Scouter AI Component used libraries
import os 
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import AIMessage, HumanMessage, SystemMessage

# evaluation
from rouge_score import rouge_scorer
from bert_score import score

In [26]:
df = pd.read_csv("../Scouting/test/football-player-stats-2023.csv")
df_1 = pd.read_csv("../Scouting/test/football-player-stats-2023-COMPLETE.csv")

In [28]:
df

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Comp,Age,Born,Playing Time MP,Performance Gls,...,Total Cmp,Total Att,Total Cmp%,Total TotDist,Total PrgDist,KP,1/3,PPA,CrsPA,PrgP
0,0,Brenden Aaronson,USA,"MF,FW",Leeds United,Premier League,23,2000,36,1,...,592,797,74.3,7577,2182,46,47,16,4,86
1,1,Paxten Aaronson,USA,"MF,DF",Eint Frankfurt,Bundesliga,20,2003,7,0,...,51,71,71.8,659,109,1,3,0,0,6
2,2,James Abankwah,IRL,DF,Udinese,Serie A,19,2004,2,0,...,23,29,79.3,375,79,0,0,0,0,0
3,3,George Abbott,ENG,MF,Tottenham,Premier League,18,2005,1,0,...,1,1,100.0,8,0,0,0,0,0,0
4,4,Yunis Abdelhamid,MAR,DF,Reims,Ligue 1,36,1987,37,1,...,1679,2031,82.7,32967,13407,13,155,5,0,215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2717,2717,Martín Zubimendi,ESP,MF,Real Sociedad,La Liga,24,1999,36,1,...,1545,1812,85.3,26783,8092,20,175,23,1,184
2718,2718,Szymon Żurkowski,POL,MF,"Fiorentina, Spezia",Serie A,26,1997,12,0,...,87,120,158.8,1282,294,1,10,2,0,11
2719,2719,Martin Ødegaard,NOR,MF,Arsenal,Premier League,25,1998,37,15,...,1449,1804,80.3,22540,6014,76,135,91,4,266
2720,2720,Milan Đurić,BIH,FW,Hellas Verona,Serie A,33,1990,28,1,...,268,523,51.2,3119,740,21,27,5,0,29


In [29]:
df_player_norm = df.copy()
custom_mapping = {
    'GK': 1,
    'DF,FW': 4,
    'MF,FW': 8,
    'DF': 2,
    'DF,MF': 3,
    'MF,DF': 5,
    'MF': 6,
    'FW,DF': 7,
    'FW,MF': 9,
    'FW': 10
}

# Apply custom mapping to the 'Pos' column
df_player_norm['Pos'] = df_player_norm['Pos'].map(custom_mapping)

In [30]:
df_player_norm

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Comp,Age,Born,Playing Time MP,Performance Gls,...,Total Cmp,Total Att,Total Cmp%,Total TotDist,Total PrgDist,KP,1/3,PPA,CrsPA,PrgP
0,0,Brenden Aaronson,USA,8,Leeds United,Premier League,23,2000,36,1,...,592,797,74.3,7577,2182,46,47,16,4,86
1,1,Paxten Aaronson,USA,5,Eint Frankfurt,Bundesliga,20,2003,7,0,...,51,71,71.8,659,109,1,3,0,0,6
2,2,James Abankwah,IRL,2,Udinese,Serie A,19,2004,2,0,...,23,29,79.3,375,79,0,0,0,0,0
3,3,George Abbott,ENG,6,Tottenham,Premier League,18,2005,1,0,...,1,1,100.0,8,0,0,0,0,0,0
4,4,Yunis Abdelhamid,MAR,2,Reims,Ligue 1,36,1987,37,1,...,1679,2031,82.7,32967,13407,13,155,5,0,215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2717,2717,Martín Zubimendi,ESP,6,Real Sociedad,La Liga,24,1999,36,1,...,1545,1812,85.3,26783,8092,20,175,23,1,184
2718,2718,Szymon Żurkowski,POL,6,"Fiorentina, Spezia",Serie A,26,1997,12,0,...,87,120,158.8,1282,294,1,10,2,0,11
2719,2719,Martin Ødegaard,NOR,6,Arsenal,Premier League,25,1998,37,15,...,1449,1804,80.3,22540,6014,76,135,91,4,266
2720,2720,Milan Đurić,BIH,10,Hellas Verona,Serie A,33,1990,28,1,...,268,523,51.2,3119,740,21,27,5,0,29


In [31]:
selected_features = ['Pos', 'Age', 'Int', 'Clr', 'KP', 'PPA', 'CrsPA', 'PrgP', 'Playing_Time_MP',
       'Performance_Gls', 'Performance_Ast', 'Performance_G_A',
       'Performance_G-PK', 'Performance_Fls', 'Performance_Fld',
       'Performance_Crs','Performance_Recov', 'Expected_xG', 'Expected_npxG', 'Expected_xAG',
       'Expected_xA', 'Expected_A-xAG', 'Expected_G-xG', 'Expected_np_G-xG',
       'Progression_PrgC', 'Progression_PrgP', 'Progression_PrgR',
       'Tackles_Tkl', 'Tackles_TklW', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
       'Tackles_Att_3rd', 'Challenges_Att', 'Challenges_Tkl_',
       'Challenges_Lost', 'Blocks_Blocks', 'Blocks_Sh', 'Blocks_Pass',
       'Standard_Sh', 'Standard_SoT','Standard_SoT_', 'Standard_Sh_90', 'Standard_Dist', 'Standard_FK',
       'Performance_GA', 'Performance_SoTA', 'Performance_Saves',
       'Performance_Save_', 'Performance_CS', 'Performance_CS_',
       'Penalty_Kicks_PKatt', 'Penalty_Kicks_Save_', 'SCA_SCA',
       'GCA_GCA', 'Aerial_Duels_Won', 'Aerial_Duels_Lost', 'Aerial_Duels_Won_',
       'Total_Cmp', 'Total_Att', 'Total_Cmp_', 'Total_TotDist',
       'Total_PrgDist', '1_3'
]

In [32]:
# Apply MinMaxScaler normalization
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df_player_norm[selected_features] = scaler.fit_transform(df_player_norm[selected_features])

KeyError: "['Playing_Time_MP', 'Performance_Gls', 'Performance_Ast', 'Performance_G_A', 'Performance_G-PK', 'Performance_Fls', 'Performance_Fld', 'Performance_Crs', 'Performance_Recov', 'Expected_xG', 'Expected_npxG', 'Expected_xAG', 'Expected_xA', 'Expected_A-xAG', 'Expected_G-xG', 'Expected_np_G-xG', 'Progression_PrgC', 'Progression_PrgP', 'Progression_PrgR', 'Tackles_Tkl', 'Tackles_TklW', 'Tackles_Def_3rd', 'Tackles_Mid_3rd', 'Tackles_Att_3rd', 'Challenges_Att', 'Challenges_Tkl_', 'Challenges_Lost', 'Blocks_Blocks', 'Blocks_Sh', 'Blocks_Pass', 'Standard_Sh', 'Standard_SoT', 'Standard_SoT_', 'Standard_Sh_90', 'Standard_Dist', 'Standard_FK', 'Performance_GA', 'Performance_SoTA', 'Performance_Saves', 'Performance_Save_', 'Performance_CS', 'Performance_CS_', 'Penalty_Kicks_PKatt', 'Penalty_Kicks_Save_', 'SCA_SCA', 'GCA_GCA', 'Aerial_Duels_Won', 'Aerial_Duels_Lost', 'Aerial_Duels_Won_', 'Total_Cmp', 'Total_Att', 'Total_Cmp_', 'Total_TotDist', 'Total_PrgDist', '1_3'] not in index"