In [11]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import warnings

In [12]:
import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))
warnings.simplefilter(action='ignore', category=(FutureWarning))

In [13]:
df = pd.read_csv("players_db/fm23/fm23db_processed.csv")

In [14]:
df[["Club"] + list(df.select_dtypes(exclude="object").columns)]

Unnamed: 0,Club,UID,Height,Age,Caps,Acceleration,Aerial Reach,Aggression,Agility,Anticipation,...,tpr_MC,tpr_DC,tpr_AMR,tpr_DL,tpr_DR,tpr_ST,tpr_ML,tpr_MR,tpr_WBR,tpr_WBL
0,FC Bayern,92039023,177,27,68,64,4,74,59,74,...,91,58,48,53,91,46,51,51,92,54
1,FC Bayern,85100467,174,30,87,79,4,69,79,84,...,55,46,88,60,49,85,81,61,58,70
2,FC Bayern,35011448,185,32,115,59,14,84,59,99,...,71,46,87,47,47,85,70,89,52,51
3,FC Bayern,8718372,193,36,112,54,74,64,64,79,...,46,46,46,46,46,46,46,46,46,46
4,FC Bayern,91104807,189,27,44,59,9,69,54,84,...,92,50,58,52,53,48,64,59,55,61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6417,Rio Ave,2000225241,176,18,0,64,14,59,49,29,...,50,45,69,46,50,53,58,62,59,48
6418,Gil Vicente,2000225242,184,18,0,49,34,24,64,39,...,45,45,45,45,45,45,45,45,45,45
6419,Gil Vicente,2000225261,177,18,0,59,4,29,64,24,...,50,45,68,46,49,52,57,61,58,48
6420,Vizela,2000225267,193,17,0,34,9,59,59,24,...,46,66,45,55,55,45,46,47,47,48


In [15]:
df.Best_Pos

0        DM
1       AML
2       AMC
3        GK
4        MC
       ... 
6417    AMR
6418     GK
6419    AMR
6420     DC
6421     GK
Name: Best_Pos, Length: 6422, dtype: object

In [16]:
df.Club.value_counts(sort=True)

Club
Hellas Verona          74
Sassuolo               73
Internazionale         71
Benfica                71
Vizela                 68
                       ..
Sparta                 24
FC Volendam            23
Paris Saint-Germain    23
Ajax                   22
Clermont               22
Name: count, Length: 153, dtype: int64

In [17]:
# df.iloc[0].to_dict()

In [18]:
rating_attrs = ["Club","gk","def","pas","dri","fin","sta","str","hed","men","iq"]

# Formations

In [19]:
def Formation_Dict(formation="4-3-3"):
    test_formation = { 'GK': 1, 'DC': 2, 'DL': 1, 'DR': 1, 'DM': 1, 'MC': 2, 'AML': 1, 'AMR': 1, 'ST': 1 }
    return test_formation

<br><br><br><br>
# Rating Functions

In [20]:
def Quantile(n=16, index=0):
    data_numeric = pd.DataFrame(list(range(n)))
    quantile_intervals = [0, 0.25, 0.40, 0.65, 0.75, 1.0]
    qval = data_numeric.quantile(quantile_intervals)
    return int(qval.iloc[index]) 

In [21]:
def Quarter_Rating(df, n, index, current_attribute):
    quantile = Quantile(n=n, index=index)
    output = df.nlargest(quantile, current_attribute)[current_attribute].mean()
    return output

<br><br><br><br>
# Club Powers

In [22]:
def TPR(df, n, lineup=False, print_club=False, p_dict={}):
    '''
    Team Power Rating
    '''
    # Choose Attribute
    current_attribute = "tpr"
    zoom_df = df
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    if not lineup:
        for club, group in club_groups:
            top_players = group.nlargest(n, 'tpr')
            p1 = Quarter_Rating(top_players, n, 1, current_attribute)
            p2 = Quarter_Rating(top_players, n, 2, current_attribute)
            p3 = Quarter_Rating(top_players, n, 3, current_attribute)
            p4 = Quarter_Rating(top_players, n, 4, current_attribute)
            p5 = Quarter_Rating(top_players, n, 5, current_attribute)
            club_rating_dict[club] = int(p1*p_dict["p1"] + p2*p_dict["p2"] + p3*p_dict["p3"] + p4*p_dict["p4"] + p5*p_dict["p5"])
            
    else: # Lineup_df is active
        for club, group in club_groups:
            positions = Formation_Dict(formation="4-3-3")
            
            selected_players = []
            used_players = set()

            for position, count in positions.items():
                position_group = group[group['Best_Pos'] == position]
                # position_group = group[group['Position'].apply(lambda x: any(position in x.split(",") for i in x.split(",") if i == position))]
                
                if position_group.empty:
                    # position_group = group[group['Position'].apply(lambda x: position in x)]
                    position_group = group[group['Position'].apply(lambda x: any(position in x.split(",") for i in x.split(",") if i == position))]
                
                position_group = position_group[~position_group.index.isin(used_players)]
                top_position_players = position_group.nlargest(count, f'tpr_{position}')
                
                selected_players.extend(top_position_players.index.tolist())
                used_players.update(top_position_players.index.tolist())

            # Ensure we have exactly 11 players 
            while len(selected_players) < 11:
                remaining_players = group[~group.index.isin(used_players)]
                if remaining_players.empty:
                    break
                next_best_player = remaining_players.nlargest(1, f'tpr_{position}')
                selected_players.extend(next_best_player.index.tolist())
                used_players.update(next_best_player.index.tolist())

            lineup_players = group.loc[selected_players].sort_values(by="Pos_Rank")
            p1 = Quarter_Rating(lineup_players, n, 1, current_attribute)
            p2 = Quarter_Rating(lineup_players, n, 2, current_attribute)
            p3 = Quarter_Rating(lineup_players, n, 3, current_attribute)
            p4 = Quarter_Rating(lineup_players, n, 4, current_attribute)
            p5 = Quarter_Rating(lineup_players, n, 5, current_attribute)
            club_rating_dict[club] = int(p1*p_dict["p1"] + p2*p_dict["p2"] + p3*p_dict["p3"] + p4*p_dict["p4"] + p5*p_dict["p5"])
            
            # if club == 'AJ Auxerre':
            #     print(lineup_players[["Name","Best_Pos","tpr"]])
            
            # if len(lineup_players) != 11:
            #     print(club, len(lineup_players), "Players")
            #     print(lineup_players[["Name","Best_Pos","tpr"]])
            #     print("\n")
            #     print("\n")
            #     print("\n")

            if club == print_club:
                print(club, "XI =", len(lineup_players))
                print(lineup_players[["Name","Best_Pos","tpr"]])
    
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [23]:
output = TPR(df, n=16, lineup=True, print_club="Galatasaray", p_dict={ 'p1': 0.30, 'p2': 0.35, 'p3': 0.15, 'p4': 0.15, 'p5': 0.05 })
output

Galatasaray XI = 11
                     Name Best_Pos  tpr
2561     Fernando Muslera       GK   72
2741           Sacha Boey       DR   74
2558       Victor Nelsson       DC   81
2768  Abdülkerim Bardakcı       DC   76
2712  Patrick van Aanholt       DL   72
2526       Lucas Torreira       DM   78
2537      Sérgio Oliveira       MC   78
2571      Fredrik Midtsjø       MC   76
2678          Yunus Akgün      AMR   75
2522         Mauro Icardi       ST   85
2651     Kerem Aktürkoğlu      AML   78


Unnamed: 0,Club,tpr
92,Manchester City,87
105,Paris Saint-Germain,87
88,Liverpool,86
47,FC Barcelona,86
93,Manchester United,85
...,...,...
45,Excelsior,72
152,İstanbulspor,72
110,RKC,71
30,Cambuur,71


In [24]:
df.query("Best_Pos != 'xST'")[["Name","tpr"]].sort_values(by="tpr",ascending=False).head(n=20)

Unnamed: 0,Name,tpr
4162,Lionel Messi,95
4164,Robert Lewandowski,93
3198,Virgil van Dijk,91
3193,Erling Haaland,90
3194,Mohamed Salah,90
3195,Harry Kane,90
4163,Kylian Mbappé,90
3196,Rúben Dias,90
3192,Kevin De Bruyne,89
3240,Harry Maguire,89


In [25]:
df.query("Club == 'Manchester United'").sort_values(by="tpr",ascending=False)[["Name","Best_Pos","Position","tpr"]].head(n=10)

Unnamed: 0,Name,Best_Pos,Position,tpr
3240,Harry Maguire,DC,DC,89
3200,Cristiano Ronaldo,ST,ST,88
3208,Raphaël Varane,DC,DC,86
3213,Bruno Fernandes,AMC,"MC,AMC",85
3219,Casemiro,DM,"DM,MC",84
3408,Phil Jones,DC,DC,83
3248,Anthony Martial,AML,"AML,ST",83
3235,Marcus Rashford,AML,"AML,ST",82
3420,Lisandro Martínez,DC,"DL,DC,DM",82
3268,Victor Lindelöf,DC,DC,82


<h3 style="color:orange;">  Keeper</h3>

In [26]:
def TGK(df, n=1):
    '''
    Team Goalkeeping Rating
    '''
    # Choose Attribute
    current_attribute = "gk"
    zoom_df = df[df['Best_Pos'] == 'GK'].copy()
    
    # Group by club
    club_groups = zoom_df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        average_rating = top_players[current_attribute].mean()
        club_rating_dict[club] = int(average_rating)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:orange;">  Defending</h3>

In [27]:
def TDEF(df, n=16, p_dict={}):
    '''
    Team Defending Rating
    '''
    # Choose Attribute
    current_attribute = "def"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:red;">  Passing</h3>

In [28]:
def TPAS(df, n=16, p_dict={}):
    '''
    Team Passing Rating
    '''
    # Choose Attribute
    current_attribute = "pas"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:red;">  Dribbling</h3>

In [71]:
def TDRI(df, n=16, p_dict={}):
    '''
    Team Dribbling Rating
    '''
    # Choose Attribute
    current_attribute = "dri"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:red;">  Finishing</h3>

In [30]:
def TFIN(df, n=16, p_dict={}):
    '''
    Team Finishing Rating
    '''
    # Choose Attribute
    current_attribute = "fin"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:green;">  Stamina</h3>

In [31]:
def TSTA(df, n=16, p_dict={}):
    '''
    Team Stamina Rating
    '''
    # Choose Attribute
    current_attribute = "sta"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:green;">  Strength</h3>

In [32]:
def TSTR(df, n=16, p_dict={}):
    '''
    Team Strength Rating
    '''
    # Choose Attribute
    current_attribute = "str"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [33]:
def THED(df, n=16, p_dict={}):
    '''
    Team Heading Rating
    '''
    # Choose Attribute
    current_attribute = "hed"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<h3 style="color:purple;">  Mental</h3>

In [34]:
def TMEN(df, n=16, p_dict={}):
    '''
    Team Mental Rating
    '''
    # Choose Attribute
    current_attribute = "men"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [35]:
def TIQ(df, n=16, p_dict={}):
    '''
    Team Intelligence Rating
    '''
    # Choose Attribute
    current_attribute = "iq"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

<br><br><br><br>
# TPR ALL

In [68]:
import pandas as pd

# Assuming df is your DataFrame and it's already loaded.

gk_attributes = ["Command of Area", "Communication", "First Touch", "Handling", "Kicking", "One vs One", "Punching", "Reflexes", "Rushing Out", "Throwing"]
mental_attributes = ["Aggression", "Anticipation", "Bravery", "Composure", "Concentration", "Decisions", "Determination", "Flair", "Leadership", "Off the Ball", "Positioning", "Teamwork", "Vision", "Work Rate"]
physical_attributes = ["Acceleration", "Agility", "Balance", "Jumping Reach", "Fitness", "Pace", "Stamina", "Strength"]
technical_attributes = ["Corners", "Crossing", "Dribbling", "Finishing", "First Touch", "Free Kick Taking", "Heading", "Long Shots", "Long Throws", "Marking", "Passing", "Penalty Taking", "Tackling", "Technique"]

def Quantile(data_numeric, n=16, index=0):
    quantile_intervals = [0, 0.25, 0.40, 0.65, 0.75, 1.0]
    qval = data_numeric.quantile(quantile_intervals)
    return int(qval.iloc[index])

def Quarter_Rating(group, n, index, current_attribute):
    quantile = Quantile(group[current_attribute], n=n, index=index)
    output = group.nlargest(quantile, current_attribute)[current_attribute].mean()
    return output

# Group by Club
grouped = df.groupby('Club').apply(lambda x: x.nlargest(16, "tpr")).reset_index(drop=True)

# Dictionary to store the results
club_stats = {}

for club, group in grouped.groupby('Club'):
    club_stats[club] = {}
    club_stats[club]["Club"] = club
    club_stats[club]["Club_id"] = group.iloc[0]["Club_id"]
    club_stats[club]["League_id"] = group.iloc[0]["League_id"]
    club_stats[club]["Division"] = group.iloc[0]["Division"]
    club_stats[club]["Based"] = group.iloc[0]["Based"]
    
    for attr_list in [gk_attributes, mental_attributes, physical_attributes, technical_attributes]:
        for attribute in attr_list:
            mean_value = group[attribute].mean()
            quarter_rating = Quarter_Rating(group, n=len(group), index=1, current_attribute=attribute)  # Adjust index as needed
            club_stats[club][f"{attribute}_mean"] = int(mean_value)
            club_stats[club][f"{attribute}_quarter_rating"] = int(quarter_rating)

# Calculate weighted average of top 5 players' quarter ratings for each club
club_rating_dict = {}

for club, group in grouped.groupby('Club'):
    top_players = group.nlargest(16, "tpr")
    p1 = Quarter_Rating(top_players, 16, 1, "tpr")
    p2 = Quarter_Rating(top_players, 16, 2, "tpr")
    p3 = Quarter_Rating(top_players, 16, 3, "tpr")
    p4 = Quarter_Rating(top_players, 16, 4, "tpr")
    p5 = Quarter_Rating(top_players, 16, 5, "tpr")
    club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)

# Convert the dictionaries to DataFrames for easier viewing
club_stats_df = pd.DataFrame(club_stats).T
club_ratings_df = pd.DataFrame.from_dict(club_rating_dict, orient='index', columns=['Club_Rating'])


In [69]:
club_stats_df

Unnamed: 0,Club,Club_id,League_id,Division,Based,Command of Area_mean,Command of Area_quarter_rating,Communication_mean,Communication_quarter_rating,First Touch_mean,...,Marking_mean,Marking_quarter_rating,Passing_mean,Passing_quarter_rating,Penalty Taking_mean,Penalty Taking_quarter_rating,Tackling_mean,Tackling_quarter_rating,Technique_mean,Technique_quarter_rating
AC Milan,AC Milan,7093,710,Italian Serie A,Italy (Serie A),12,30,13,19,69,...,47,47,67,67,53,53,52,52,68,68
AJ Auxerre,AJ Auxerre,145,773,Ligue 1 Uber Eats,France (Ligue 1 Uber Eats),13,18,13,21,58,...,42,42,57,57,45,45,44,44,62,62
AS Monaco,AS Monaco,193,773,Ligue 1 Uber Eats,France (Ligue 1 Uber Eats),13,19,11,15,63,...,46,46,63,63,48,48,50,50,63,63
AS Roma,AS Roma,2229,710,Italian Serie A,Italy (Serie A),10,15,10,14,67,...,52,52,67,67,47,47,59,59,67,67
AZ,AZ,274,363,Eredivisie,Holland (Eredivisie),15,37,17,25,59,...,42,42,60,60,45,45,44,44,60,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Werder Bremen,Werder Bremen,9601,185,Bundesliga,Germany (Bundesliga),13,22,12,25,57,...,50,50,59,59,47,47,53,53,56,56
West Ham United,West Ham United,11994,354,English Premier Division,England (Premier Division),8,15,9,15,65,...,54,54,61,61,51,51,58,58,64,64
Wolverhampton,Wolverhampton,12084,354,English Premier Division,England (Premier Division),8,12,9,12,62,...,47,47,64,64,50,50,52,52,67,67
Ümraniyespor,Ümraniyespor,12340,1331,Turkish Super League,Turkey (Super League),12,19,11,18,54,...,40,40,57,57,44,44,43,43,57,57


In [70]:
club_stats_df.to_csv("players_db/fm23/team_ratings_Test.csv", index=False)

In [36]:
def TPRALL(df, n, coefficients):
    '''
    Team Power Rating All Attributes
    '''
    tpr = TPR(df, n, p_dict=coefficients)
    tgk = TGK(df, 1)
    tdef = TDEF(df, n, p_dict=coefficients)
    tpas = TPAS(df, n, p_dict=coefficients)
    tdri = TDRI(df, n, p_dict=coefficients)
    tfin = TFIN(df, n, p_dict=coefficients)
    tsta = TSTA(df, n, p_dict=coefficients)
    tstr = TSTR(df, n, p_dict=coefficients)
    thed = THED(df, n, p_dict=coefficients)
    tmen = TMEN(df, n, p_dict=coefficients)
    tiq = TIQ(df, n, p_dict=coefficients)
    
    top_players = df.groupby('Club').apply(lambda x: x.nlargest(n, "tpr")).reset_index(drop=True)
    club_rating_dict = {}
    
    for club, group in top_players.groupby('Club'):
        club_rating_dict[club] = {
            "Club": club,
            "Club_id": group.iloc[0]["Club_id"],
            "League_id": group.iloc[0]["League_id"],
            "Division": group.iloc[0]["Division"],
            "Based": group.iloc[0]["Based"],
            "tpr": tpr.query(f"Club == '{club}'").iloc[0, 1],    
            "gk": tgk.query(f"Club == '{club}'").iloc[0, 1],    
            "def": tdef.query(f"Club == '{club}'").iloc[0, 1],    
            "pas": tpas.query(f"Club == '{club}'").iloc[0, 1],    
            "dri": tdri.query(f"Club == '{club}'").iloc[0, 1],    
            "fin": tfin.query(f"Club == '{club}'").iloc[0, 1],    
            "sta": tsta.query(f"Club == '{club}'").iloc[0, 1],    
            "str": tstr.query(f"Club == '{club}'").iloc[0, 1],    
            "hed": thed.query(f"Club == '{club}'").iloc[0, 1],    
            "men": tmen.query(f"Club == '{club}'").iloc[0, 1],    
            "iq": tiq.query(f"Club == '{club}'").iloc[0, 1],    
        }
        
    club_df = pd.DataFrame.from_dict(club_rating_dict, orient='index').reset_index(drop=True)
    return club_df.sort_values("tpr", ascending=False)

In [37]:
squad_coef = { 'p1': 0.40, 'p2': 0.35, 'p3': 0.15, 'p4': 0.05, 'p5': 0.05 }
all_df = TPRALL(df, 16, squad_coef)

In [38]:
# all_df.query(f"League_id == {354}").head(n=10)
all_df.head(n=10)

Unnamed: 0,Club,Club_id,League_id,Division,Based,tpr,gk,def,pas,dri,fin,sta,str,hed,men,iq
92,Manchester City,6827,354,English Premier Division,England (Premier Division),88,73,78,80,77,76,83,78,71,77,77
105,Paris Saint-Germain,7994,773,Ligue 1 Uber Eats,France (Ligue 1 Uber Eats),88,66,75,82,80,74,76,75,73,74,80
88,Liverpool,6518,354,English Premier Division,England (Premier Division),87,74,79,78,77,72,83,77,77,75,75
93,Manchester United,6828,354,English Premier Division,England (Premier Division),86,65,79,77,76,76,87,77,78,71,72
47,FC Barcelona,1435,1215,Spanish First Division,Spain (First Division),86,74,78,76,76,75,81,77,77,77,78
113,Real Madrid,8716,1215,Spanish First Division,Spain (First Division),86,69,76,83,75,77,79,78,73,74,77
77,Internazionale,5215,710,Italian Serie A,Italy (Serie A),86,67,77,75,72,75,81,84,82,71,72
134,Tottenham Hotspur,11015,354,English Premier Division,England (Premier Division),85,62,77,73,72,76,83,79,76,70,70
48,FC Bayern,3704,185,Bundesliga,Germany (Bundesliga),85,73,75,75,78,68,85,79,71,75,75
34,Chelsea,2432,354,English Premier Division,England (Premier Division),85,68,79,74,74,71,88,82,75,76,75


In [39]:
df.query("Club == 'Paris Saint-Germain'").sort_values(by="tpr",ascending=False)[["Name","Best_Pos","Position","tpr"]].head(n=10)

Unnamed: 0,Name,Best_Pos,Position,tpr
4162,Lionel Messi,AMC,"AMR,AMC,ST",95
4163,Kylian Mbappé,AML,"AMR,AML,ST",90
4171,Sergio Ramos,DC,DC,88
4167,Neymar,AML,"ML,AML,AMC,ST",87
4209,Presnel Kimpembe,DC,DC,87
4177,Marquinhos,DM,"DC,DM",83
4194,Marco Verratti,MC,"DM,MC",82
4224,Fabián,MC,"DM,MC,AMC",82
4237,Pablo Sarabia,AMR,"MR,ML,AMR,AML,AMC",82
4440,Nuno Mendes,DL,"DL,WBL",79


In [40]:
df.sort_values(by="tpr",ascending=False)[["Name","Best_Pos","Position","tpr"]].head(n=10)

Unnamed: 0,Name,Best_Pos,Position,tpr
4162,Lionel Messi,AMC,"AMR,AMC,ST",95
4164,Robert Lewandowski,ST,ST,93
3198,Virgil van Dijk,DC,DC,91
3193,Erling Haaland,ST,ST,90
3194,Mohamed Salah,AMR,"AMR,AML,ST",90
3195,Harry Kane,ST,"AMC,ST",90
4163,Kylian Mbappé,AML,"AMR,AML,ST",90
3196,Rúben Dias,DC,DC,90
3192,Kevin De Bruyne,MC,"MR,ML,MC,AMC",89
3240,Harry Maguire,DC,DC,89


In [41]:
# Export as CSV

all_df.to_csv("players_db/fm23/team_ratings.csv", index=False)

<br><br><br><br>
<h1 style="color:red;">  Matching FBREF Team Names</h1>

In [42]:
from rapidfuzz import process
def find_best_match(name, choices):
    return process.extractOne(name, choices)

In [43]:
top5_leagues = ['Ligue 1 Uber Eats', 'English Premier Division', 'Italian Serie A',
       'Spanish First Division', 'Bundesliga'] 
top5_url = "https://fbref.com/en/comps/Big5/2022-2023/shooting/squads/2022-2023-Big-5-European-Leagues-Stats"

In [45]:
data = pd.read_html(top5_url)
team_df = pd.read_csv("players_db/fm23/team_ratings.csv")

In [46]:
fbref_teams = data[0][('Unnamed: 1_level_0', 'Squad')].tolist()
fm_teams = team_df[team_df.Division.isin(top5_leagues)]["Club"].tolist()

In [47]:
for club_name in fm_teams:
    answer, score, other = find_best_match(club_name, fbref_teams)
    where_id = team_df[team_df.Club==club_name].iloc[0].name
    team_df.at[where_id, 'fbref_name'] = answer
team_df.to_csv("players_db/fm23/team_ratings.csv", index=False)