In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import warnings

In [2]:
import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))
warnings.simplefilter(action='ignore', category=(FutureWarning))

In [3]:
df = pd.read_csv("players_db/fm23/fm23db_processed.csv")

In [36]:
df.Club.value_counts(sort=True)

Club
Hellas Verona          74
Sassuolo               73
Internazionale         71
Benfica                71
Vizela                 68
                       ..
Sparta                 24
FC Volendam            23
Paris Saint-Germain    23
Ajax                   22
Clermont               22
Name: count, Length: 153, dtype: int64

In [4]:
df.iloc[0].to_dict()

{'Unnamed: 0': 42332,
 'UID': 92039023,
 'Inf': nan,
 'Name': 'Joshua Kimmich',
 'DoB': '1995-02-08',
 'Nat': 'GER',
 'Division': 'Bundesliga',
 'Club': 'FC Bayern',
 'Based': 'Germany (Bundesliga)',
 'Preferred Foot': 'Right',
 'Right Foot': 'Very Strong',
 'Left Foot': 'Fairly Strong',
 'Position': 'DR,WBR,DM,MC',
 'Height': 177,
 'Weight': '75 kg',
 'Age': 27,
 'Transfer Value': '€96M - €119M',
 'Wage': '€1,496,000 p/m',
 'AT Apps': '253',
 'AT Gls': '23',
 'Team': nan,
 'Caps': 68,
 'Yth Apps': '14',
 'Style': 'Leader',
 'Rc Injury': '-',
 'Best Role': 'Deep Lying Playmaker',
 'Best Duty': 'Defend',
 'Best_Pos': 'DM',
 'Acceleration': 71.5,
 'Aerial Reach': 5.5,
 'Aggression': 82.5,
 'Agility': 66.0,
 'Anticipation': 82.5,
 'Balance': 71.5,
 'Bravery': 82.5,
 'Command of Area': 11.0,
 'Communication': 11.0,
 'Composure': 93.5,
 'Concentration': 77.0,
 'Corners': 82.5,
 'Crossing': 93.5,
 'Decisions': 88.0,
 'Determination': 110.0,
 'Dribbling': 66.0,
 'Eccentricity': 16.5,
 'Finish

In [5]:
rating_attrs = ["Club","gk","def","pas","dri","fin","sta","str","hed","men","iq"]

# Formations

In [6]:
def Formation_Dict(formation="4-3-3"):
    test_formation = { 'GK': 1, 'DC': 2, 'DL': 1, 'DR': 1, 'DM': 0, 'MC': 3, 'AML': 1, 'AMR': 1, 'ST': 1 }
    return test_formation

<br><br><br><br>
# Rating Functions

In [7]:
def Quantile(n=16, index=0):
    data_numeric = pd.DataFrame(list(range(n)))
    quantile_intervals = [0, 0.25, 0.40, 0.65, 0.75, 1.0]
    qval = data_numeric.quantile(quantile_intervals)
    return int(qval.iloc[index]) 

In [8]:
def Quarter_Rating(df, n, index, current_attribute):
    quantile = Quantile(n=n, index=index)
    output = df.nlargest(quantile, current_attribute)[current_attribute].mean()
    return output

<br><br><br><br>
# Club Powers

In [9]:
def TPR(df, n=16, lineup=False, print_club=False):
    '''
    Team Power Rating
    '''
    # Choose Attribute
    current_attribute = "tpr"
    zoom_df = df
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    if not lineup:
        for club, group in club_groups:
            top_players = group.nlargest(n, 'tpr')
            p1 = Quarter_Rating(top_players, n, 1, current_attribute)
            p2 = Quarter_Rating(top_players, n, 2, current_attribute)
            p3 = Quarter_Rating(top_players, n, 3, current_attribute)
            p4 = Quarter_Rating(top_players, n, 4, current_attribute)
            p5 = Quarter_Rating(top_players, n, 5, current_attribute)
            club_rating_dict[club] = int(p1*0.40 + p2*0.40 + p3*0.15 + p4*0.05 + p5*0.05)
            
    else: # Lineup_df is active
        for club, group in club_groups:
            positions = Formation_Dict(formation="4-3-3")
            
            selected_players = []
            used_players = set()

            for position, count in positions.items():
                position_group = group[group['Best_Pos'] == position]
                
                if position_group.empty:
                    position_group = group[group['Position'].apply(lambda x: position in x)]
                
                position_group = position_group[~position_group.index.isin(used_players)]
                top_position_players = position_group.nlargest(count, 'tpr')
                
                selected_players.extend(top_position_players.index.tolist())
                used_players.update(top_position_players.index.tolist())

            # Ensure we have exactly 11 players 
            while len(selected_players) < 11:
                remaining_players = group[~group.index.isin(used_players)]
                if remaining_players.empty:
                    break
                next_best_player = remaining_players.nlargest(1, 'tpr')
                selected_players.extend(next_best_player.index.tolist())
                used_players.update(next_best_player.index.tolist())

            lineup_players = group.loc[selected_players].sort_values(by="Pos_Rank")
            p1 = Quarter_Rating(lineup_players, n, 1, current_attribute)
            p2 = Quarter_Rating(lineup_players, n, 2, current_attribute)
            p3 = Quarter_Rating(lineup_players, n, 3, current_attribute)
            p4 = Quarter_Rating(lineup_players, n, 4, current_attribute)
            p5 = Quarter_Rating(lineup_players, n, 5, current_attribute)
            club_rating_dict[club] = int(p1*0.40 + p2*0.40 + p3*0.15 + p4*0.05 + p5*0.05)
            
            # if club == 'AJ Auxerre':
            #     print(lineup_players[["Name","Best_Pos","tpr"]])
            
            # if len(lineup_players) != 11:
            #     print(club, len(lineup_players), "Players")
            #     print(lineup_players[["Name","Best_Pos","tpr"]])
            #     print("\n")
            #     print("\n")
            #     print("\n")

            if club == print_club:
                print(club, "XI =", len(lineup_players))
                print(lineup_players[["Name","Best_Pos","tpr"]])
    
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [10]:
output = TPR(df, n=45, lineup=True, print_club="Real Madrid")
output

Real Madrid XI = 11
                   Name Best_Pos  tpr
4168   Thibaut Courtois       GK   76
4211      Dani Carvajal       DR   74
4217    Antonio Rüdiger       DC   84
4219       Éder Militão       DC   81
4188        David Alaba       DL   77
4165        Luka Modrić       MC   80
4176         Toni Kroos       MC   80
4183  Federico Valverde       MC   76
4218      Marco Asensio      AMR   79
4166      Karim Benzema       ST   86
4170    Vinícius Júnior      AML   84


Unnamed: 0,Club,tpr
91,Manchester City,84
113,Real Madrid,83
104,Paris Saint-Germain,83
87,Liverpool,83
47,FC Bayern,82
...,...,...
152,İstanbulspor,65
54,FC Volendam,63
109,RKC,63
29,Cambuur,63


In [11]:
df.query("Club == 'Real Madrid'").sort_values(by="tpr",ascending=False)[["Name","Best_Pos","Position","tpr"]]

Unnamed: 0,Name,Best_Pos,Position,tpr
4166,Karim Benzema,ST,ST,86
4217,Antonio Rüdiger,DC,DC,84
4170,Vinícius Júnior,AML,"AMR,AML",84
4219,Éder Militão,DC,DC,81
4213,Rodrygo,AML,"AMR,AML,ST",80
...,...,...,...,...
6325,Isi,AMR,"AMR,AML",37
6330,Jorge,GK,GK,37
6343,Yago,MC,"MC,AMC",36
6319,Axel,DC,DC,36


<h3 style="color:orange;">  Keeper</h3>

In [12]:
def TGK(df, n=1):
    '''
    Team Goalkeeping Rating
    '''
    # Choose Attribute
    current_attribute = "gk"
    zoom_df = df[df['Best_Pos'] == 'GK'].copy()
    
    # Group by club
    club_groups = zoom_df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        average_rating = top_players[current_attribute].mean()
        club_rating_dict[club] = int(average_rating)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [13]:
TGK(df).head(n=5)

Unnamed: 0,Club,gk
46,FC Barcelona,83
47,FC Bayern,82
87,Liverpool,82
91,Manchester City,81
18,Atlético Madrid,80


<h3 style="color:orange;">  Defending</h3>

In [14]:
def TDEF(df, n=16):
    '''
    Team Defending Rating
    '''
    # Choose Attribute
    current_attribute = "def"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [15]:
TDEF(df).head(n=5)

Unnamed: 0,Club,def
76,Internazionale,82
91,Manchester City,82
149,West Ham United,81
87,Liverpool,81
113,Real Madrid,81


<h3 style="color:red;">  Passing</h3>

In [16]:
def TPAS(df, n=16):
    '''
    Team Passing Rating
    '''
    # Choose Attribute
    current_attribute = "pas"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [17]:
TPAS(df).head(n=5)

Unnamed: 0,Club,pas
113,Real Madrid,92
104,Paris Saint-Germain,91
91,Manchester City,89
125,Sevilla,87
87,Liverpool,87


<h3 style="color:red;">  Dribbling</h3>

In [18]:
def TDRI(df, n=16):
    '''
    Team Dribbling Rating
    '''
    # Choose Attribute
    current_attribute = "dri"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [19]:
TDRI(df).head(n=5)

Unnamed: 0,Club,dri
104,Paris Saint-Germain,89
47,FC Bayern,87
91,Manchester City,86
87,Liverpool,86
150,Wolverhampton,85


<h3 style="color:red;">  Finishing</h3>

In [20]:
def TFIN(df, n=16):
    '''
    Team Finishing Rating
    '''
    # Choose Attribute
    current_attribute = "fin"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [21]:
TFIN(df).head(n=5)

Unnamed: 0,Club,fin
113,Real Madrid,86
134,Tottenham Hotspur,85
91,Manchester City,85
16,Atalanta,84
92,Manchester United,84


<h3 style="color:green;">  Stamina</h3>

In [22]:
def TSTA(df, n=16):
    '''
    Team Stamina Rating
    '''
    # Choose Attribute
    current_attribute = "sta"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [23]:
TSTA(df).head(n=5)

Unnamed: 0,Club,sta
33,Chelsea,98
92,Manchester United,96
84,Leeds United,96
47,FC Bayern,94
134,Tottenham Hotspur,93


<h3 style="color:green;">  Strength</h3>

In [24]:
def TSTR(df, n=16):
    '''
    Team Strength Rating
    '''
    # Choose Attribute
    current_attribute = "str"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [25]:
TSTR(df).head(n=5)

Unnamed: 0,Club,str
76,Internazionale,93
78,Juventus,92
16,Atalanta,91
74,Hellas Verona,91
33,Chelsea,91


In [26]:
def THED(df, n=16):
    '''
    Team Heading Rating
    '''
    # Choose Attribute
    current_attribute = "hed"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [27]:
THED(df).head(n=5)

Unnamed: 0,Club,hed
76,Internazionale,91
149,West Ham United,91
3,AS Roma,88
92,Manchester United,87
43,Everton,86


<h3 style="color:purple;">  Mental</h3>

In [28]:
def TMEN(df, n=16):
    '''
    Team Mental Rating
    '''
    # Choose Attribute
    current_attribute = "men"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [29]:
TMEN(df).head(n=5)

Unnamed: 0,Club,men
91,Manchester City,86
46,FC Barcelona,86
33,Chelsea,85
18,Atlético Madrid,84
47,FC Bayern,84


In [30]:
def TIQ(df, n=16):
    '''
    Team Intelligence Rating
    '''
    # Choose Attribute
    current_attribute = "iq"
    
    # Group by club
    club_groups = df.groupby('Club')
    club_rating_dict = {}

    for club, group in club_groups:
        top_players = group.nlargest(n, current_attribute)
        p1 = Quarter_Rating(top_players, n, 1, current_attribute)
        p2 = Quarter_Rating(top_players, n, 2, current_attribute)
        p3 = Quarter_Rating(top_players, n, 3, current_attribute)
        p4 = Quarter_Rating(top_players, n, 4, current_attribute)
        p5 = Quarter_Rating(top_players, n, 5, current_attribute)
        club_rating_dict[club] = int(p1*0.35 + p2*0.25 + p3*0.20 + p4*0.15 + p5*0.05)
        
    club_df = pd.DataFrame(list(club_rating_dict.items()), columns=['Club', current_attribute])
    return club_df.sort_values(current_attribute, ascending=False)

In [31]:
TIQ(df).head(n=5)

Unnamed: 0,Club,iq
91,Manchester City,84
46,FC Barcelona,84
113,Real Madrid,84
104,Paris Saint-Germain,83
33,Chelsea,82
