In [186]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import warnings

In [187]:
import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))
warnings.simplefilter(action='ignore', category=(FutureWarning))

In [188]:
df = pd.read_csv("players_db/fm23/fm23db_processed.csv")

In [189]:
df.Club.value_counts(sort=True)

Club
Hellas Verona          74
Sassuolo               73
Internazionale         71
Benfica                71
Vizela                 68
                       ..
Sparta                 24
FC Volendam            23
Paris Saint-Germain    23
Ajax                   22
Clermont               22
Name: count, Length: 153, dtype: int64

In [190]:
df.iloc[0].to_dict()

{'UID': 92039023,
 'Inf': nan,
 'Name': 'Joshua Kimmich',
 'DoB': '1995-02-08',
 'Nat': 'GER',
 'Division': 'Bundesliga',
 'Club': 'FC Bayern',
 'Based': 'Germany (Bundesliga)',
 'Preferred Foot': 'Right',
 'Right Foot': 'Very Strong',
 'Left Foot': 'Fairly Strong',
 'Position': 'DR,WBR,DM,MC',
 'Height': 177,
 'Weight': '75 kg',
 'Age': 27,
 'Transfer Value': '€96M - €119M',
 'Wage': '€1,496,000 p/m',
 'AT Apps': '253',
 'AT Gls': '23',
 'Team': nan,
 'Caps': 68,
 'Yth Apps': '14',
 'Style': 'Leader',
 'Rc Injury': '-',
 'Best Role': 'Deep Lying Playmaker',
 'Best Duty': 'Defend',
 'Best_Pos': 'DM',
 'Acceleration': 71.5,
 'Aerial Reach': 5.5,
 'Aggression': 82.5,
 'Agility': 66.0,
 'Anticipation': 82.5,
 'Balance': 71.5,
 'Bravery': 82.5,
 'Command of Area': 11.0,
 'Communication': 11.0,
 'Composure': 93.5,
 'Concentration': 77.0,
 'Corners': 82.5,
 'Crossing': 93.5,
 'Decisions': 88.0,
 'Determination': 110.0,
 'Dribbling': 66.0,
 'Eccentricity': 16.5,
 'Finishing': 60.5,
 'First To

In [191]:
rating_attrs = ["Club","gk","def","pas","dri","fin","sta","str","hed","men","iq"]

# Formations

In [192]:
def Formation_Dict(formation="4-3-3"):
    test_formation = { 'GK': 1, 'DC': 2, 'DL': 1, 'DR': 1, 'DM': 0, 'MC': 3, 'AML': 1, 'AMR': 1, 'ST': 1 }
    return test_formation

<br><br><br><br>
# Rating Functions

In [193]:
def Quantile(n=16, index=0):
    data_numeric = pd.DataFrame(list(range(n)))
    quantile_intervals = [0, 0.25, 0.40, 0.65, 0.75, 1.0]
    qval = data_numeric.quantile(quantile_intervals)
    return int(qval.iloc[index]) 

In [194]:
def Quarter_Rating(df, n, index, current_attribute):
    quantile = Quantile(n=n, index=index)
    output = df.nlargest(quantile, current_attribute)[current_attribute].mean()
    return output

<br><br><br><br>
# Club Powers

In [195]:
def TPR(df, n=16, lineup=False, print_club=False):
    '''
    Team Power Rating
    '''
    # Choose Attribute
    current_attribute = "tpr"
    zoom_df = df
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    if not lineup:
        for club, group in club_groups:
            top_players = group.nlargest(n, 'tpr')
            p1 = Quarter_Rating(top_players, n, 1, current_attribute)
            p2 = Quarter_Rating(top_players, n, 2, current_attribute)
            p3 = Quarter_Rating(top_players, n, 3, current_attribute)
            p4 = Quarter_Rating(top_players, n, 4, current_attribute)
            p5 = Quarter_Rating(top_players, n, 5, current_attribute)
            club_rating_dict[club] = int(p1*0.40 + p2*0.35 + p3*0.15 + p4*0.05 + p5*0.05)
            
    else: # Lineup_df is active
        for club, group in club_groups:
            positions = Formation_Dict(formation="4-3-3")
            
            selected_players = []
            used_players = set()

            for position, count in positions.items():
                position_group = group[group['Best_Pos'] == position]
                # position_group = group[group['Position'].apply(lambda x: any(position in x.split(",") for i in x.split(",") if i == position))]
                
                if position_group.empty:
                    # position_group = group[group['Position'].apply(lambda x: position in x)]
                    position_group = group[group['Position'].apply(lambda x: any(position in x.split(",") for i in x.split(",") if i == position))]
                
                position_group = position_group[~position_group.index.isin(used_players)]
                top_position_players = position_group.nlargest(count, f'tpr_{position}')
                
                selected_players.extend(top_position_players.index.tolist())
                used_players.update(top_position_players.index.tolist())

            # Ensure we have exactly 11 players 
            while len(selected_players) < 11:
                remaining_players = group[~group.index.isin(used_players)]
                if remaining_players.empty:
                    break
                next_best_player = remaining_players.nlargest(1, f'tpr_{position}')
                selected_players.extend(next_best_player.index.tolist())
                used_players.update(next_best_player.index.tolist())

            lineup_players = group.loc[selected_players].sort_values(by="Pos_Rank")
            p1 = Quarter_Rating(lineup_players, n, 1, current_attribute)
            p2 = Quarter_Rating(lineup_players, n, 2, current_attribute)
            p3 = Quarter_Rating(lineup_players, n, 3, current_attribute)
            p4 = Quarter_Rating(lineup_players, n, 4, current_attribute)
            p5 = Quarter_Rating(lineup_players, n, 5, current_attribute)
            club_rating_dict[club] = int(p1*0.40 + p2*0.35 + p3*0.15 + p4*0.05 + p5*0.05)
            
            # if club == 'AJ Auxerre':
            #     print(lineup_players[["Name","Best_Pos","tpr"]])
            
            # if len(lineup_players) != 11:
            #     print(club, len(lineup_players), "Players")
            #     print(lineup_players[["Name","Best_Pos","tpr"]])
            #     print("\n")
            #     print("\n")
            #     print("\n")

            if club == print_club:
                print(club, "XI =", len(lineup_players))
                print(lineup_players[["Name","Best_Pos","tpr"]])
    
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [196]:
output = TPR(df, n=16, lineup=True, print_club="Manchester United")
output

Manchester United XI = 11
                   Name Best_Pos  tpr
3406    Martin Dúbravka       GK   72
3279  Aaron Wan-Bissaka       DR   70
3240      Harry Maguire       DC   85
3208     Raphaël Varane       DC   81
3262          Luke Shaw       DL   76
3227  Christian Eriksen       MC   77
3323    Scott McTominay       MC   73
3360               Fred       MC   72
3507             Antony      AMR   73
3200  Cristiano Ronaldo       ST   85
3248    Anthony Martial      AML   79


Unnamed: 0,Club,tpr
88,Liverpool,84
92,Manchester City,84
105,Paris Saint-Germain,84
47,FC Barcelona,83
77,Internazionale,82
...,...,...
45,Excelsior,63
30,Cambuur,63
55,FC Volendam,63
110,RKC,63


In [197]:
df.query("Club == 'Manchester United'").sort_values(by="tpr",ascending=False)[["Name","Best_Pos","Position","tpr"]].head(n=25)

Unnamed: 0,Name,Best_Pos,Position,tpr
3200,Cristiano Ronaldo,ST,ST,85
3240,Harry Maguire,DC,DC,85
3208,Raphaël Varane,DC,DC,81
3213,Bruno Fernandes,AMC,"MC,AMC",81
3219,Casemiro,DM,"DM,MC",79
3248,Anthony Martial,AML,"AML,ST",79
3235,Marcus Rashford,AML,"AML,ST",78
3420,Lisandro Martínez,DC,"DL,DC,DM",77
3227,Christian Eriksen,MC,"DM,MC,AMC",77
3408,Phil Jones,DC,DC,76


<h3 style="color:orange;">  Keeper</h3>

In [198]:
def TGK(df, n=1):
    '''
    Team Goalkeeping Rating
    '''
    # Choose Attribute
    current_attribute = "gk"
    zoom_df = df[df['Best_Pos'] == 'GK'].copy()
    
    # Group by club
    club_groups = zoom_df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        average_rating = top_players[current_attribute].mean()
        club_rating_dict[club] = int(average_rating)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [199]:
TGK(df).head(n=5)

Unnamed: 0,Club,gk
47,FC Barcelona,83
48,FC Bayern,82
88,Liverpool,82
92,Manchester City,81
18,Atlético Madrid,80


<h3 style="color:orange;">  Defending</h3>

In [200]:
def TDEF(df, n=16):
    '''
    Team Defending Rating
    '''
    # Choose Attribute
    current_attribute = "def"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [201]:
TDEF(df).head(n=5)

Unnamed: 0,Club,def
77,Internazionale,82
92,Manchester City,82
149,West Ham United,81
88,Liverpool,81
113,Real Madrid,81


<h3 style="color:red;">  Passing</h3>

In [202]:
def TPAS(df, n=16):
    '''
    Team Passing Rating
    '''
    # Choose Attribute
    current_attribute = "pas"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [203]:
TPAS(df).head(n=5)

Unnamed: 0,Club,pas
113,Real Madrid,92
105,Paris Saint-Germain,91
92,Manchester City,89
88,Liverpool,87
125,Sevilla,87


<h3 style="color:red;">  Dribbling</h3>

In [204]:
def TDRI(df, n=16):
    '''
    Team Dribbling Rating
    '''
    # Choose Attribute
    current_attribute = "dri"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [205]:
TDRI(df).head(n=5)

Unnamed: 0,Club,dri
105,Paris Saint-Germain,89
48,FC Bayern,87
92,Manchester City,86
88,Liverpool,86
150,Wolverhampton,85


<h3 style="color:red;">  Finishing</h3>

In [206]:
def TFIN(df, n=16):
    '''
    Team Finishing Rating
    '''
    # Choose Attribute
    current_attribute = "fin"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [207]:
TFIN(df).head(n=5)

Unnamed: 0,Club,fin
113,Real Madrid,86
134,Tottenham Hotspur,85
92,Manchester City,85
93,Manchester United,84
16,Atalanta,84


<h3 style="color:green;">  Stamina</h3>

In [208]:
def TSTA(df, n=16):
    '''
    Team Stamina Rating
    '''
    # Choose Attribute
    current_attribute = "sta"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [209]:
TSTA(df).head(n=5)

Unnamed: 0,Club,sta
34,Chelsea,98
85,Leeds United,96
93,Manchester United,96
48,FC Bayern,94
134,Tottenham Hotspur,93


<h3 style="color:green;">  Strength</h3>

In [210]:
def TSTR(df, n=16):
    '''
    Team Strength Rating
    '''
    # Choose Attribute
    current_attribute = "str"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [211]:
TSTR(df).head(n=5)

Unnamed: 0,Club,str
77,Internazionale,93
79,Juventus,92
16,Atalanta,91
34,Chelsea,91
75,Hellas Verona,91


In [212]:
def THED(df, n=16):
    '''
    Team Heading Rating
    '''
    # Choose Attribute
    current_attribute = "hed"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [213]:
THED(df).head(n=5)

Unnamed: 0,Club,hed
149,West Ham United,91
77,Internazionale,91
3,AS Roma,88
93,Manchester United,87
44,Everton,86


<h3 style="color:purple;">  Mental</h3>

In [214]:
def TMEN(df, n=16):
    '''
    Team Mental Rating
    '''
    # Choose Attribute
    current_attribute = "men"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [215]:
TMEN(df).head(n=5)

Unnamed: 0,Club,men
92,Manchester City,86
47,FC Barcelona,86
34,Chelsea,85
18,Atlético Madrid,84
88,Liverpool,84


In [216]:
def TIQ(df, n=16):
    '''
    Team Intelligence Rating
    '''
    # Choose Attribute
    current_attribute = "iq"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [217]:
TIQ(df).head(n=5)

Unnamed: 0,Club,iq
113,Real Madrid,84
47,FC Barcelona,84
92,Manchester City,84
105,Paris Saint-Germain,83
88,Liverpool,82


<br><br><br><br>
# TPR ALL

In [218]:
def TPRALL(df, n=16):
    '''
    Team Power Rating All Attributes
    '''
    tpr = TPR(df, n)
    tgk = TGK(df, 1)
    tdef = TDEF(df, n)
    tpas = TPAS(df, n)
    tdri = TDRI(df, n)
    tfin = TFIN(df, n)
    tsta = TSTA(df, n)
    tstr = TSTR(df, n)
    thed = THED(df, n)
    tmen = TMEN(df, n)
    tiq = TIQ(df, n)
    
    top_players = df.groupby('Club').apply(lambda x: x.nlargest(n, "tpr")).reset_index(drop=True)
    club_rating_dict = {}
    
    for club, group in top_players.groupby('Club'):
        club_rating_dict[club] = {
            "Club": club,
            "Club_id": group.iloc[0]["Club_id"],
            "League_id": group.iloc[0]["League_id"],
            "Division": group.iloc[0]["Division"],
            "Based": group.iloc[0]["Based"],
            "tpr": tpr.query(f"Club == '{club}'").iloc[0, 1],    
            "gk": tgk.query(f"Club == '{club}'").iloc[0, 1],    
            "def": tdef.query(f"Club == '{club}'").iloc[0, 1],    
            "pas": tpas.query(f"Club == '{club}'").iloc[0, 1],    
            "dri": tdri.query(f"Club == '{club}'").iloc[0, 1],    
            "fin": tfin.query(f"Club == '{club}'").iloc[0, 1],    
            "sta": tsta.query(f"Club == '{club}'").iloc[0, 1],    
            "str": tstr.query(f"Club == '{club}'").iloc[0, 1],    
            "hed": thed.query(f"Club == '{club}'").iloc[0, 1],    
            "men": tmen.query(f"Club == '{club}'").iloc[0, 1],    
            "iq": tiq.query(f"Club == '{club}'").iloc[0, 1],    
        }
        
    club_df = pd.DataFrame.from_dict(club_rating_dict, orient='index').reset_index(drop=True)
    return club_df.sort_values("tpr", ascending=False)

In [219]:
all_df = TPRALL(df, 16)
all_df

Unnamed: 0,Club,Club_id,League_id,Division,Based,tpr,gk,def,pas,dri,fin,sta,str,hed,men,iq
92,Manchester City,6827,354,English Premier Division,England (Premier Division),85,81,82,89,86,85,92,87,79,86,84
105,Paris Saint-Germain,7994,773,Ligue 1 Uber Eats,France (Ligue 1 Uber Eats),85,74,79,91,89,82,85,84,81,82,83
88,Liverpool,6518,354,English Premier Division,England (Premier Division),84,82,81,87,86,80,92,86,86,84,82
47,FC Barcelona,1435,1215,Spanish First Division,Spain (First Division),83,83,80,85,85,83,90,86,85,86,84
113,Real Madrid,8716,1215,Spanish First Division,Spain (First Division),83,77,81,92,84,86,88,87,81,83,84
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,Cambuur,9344,363,Eredivisie,Holland (Eredivisie),64,61,64,65,71,62,73,71,72,68,63
55,FC Volendam,3948,363,Eredivisie,Holland (Eredivisie),64,61,64,68,69,61,68,69,73,66,64
45,Excelsior,3655,363,Eredivisie,Holland (Eredivisie),64,57,63,67,70,56,70,67,72,65,64
110,RKC,8812,363,Eredivisie,Holland (Eredivisie),64,59,63,67,70,61,70,73,67,68,62


In [220]:
# Export as CSV

all_df.to_csv("players_db/fm23/team_ratings.csv", index=False)