<a href="https://colab.research.google.com/github/hoangthanh300405-ops/football-ai-colab-/blob/main/ranking_players.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
import pandas as pd
import numpy as np

In [39]:
df = pd.read_csv('lineups.csv')
df.head(15)

Unnamed: 0,eventId,teamId,homeAway,starter,athleteDisplayName,Pos,Age,Min/Match,Save%,GA/Match,...,DEF,Save_n,CS_n,GA_gk_n,PK_n,GK,Rating,ATK_team,DEF_team,GK_team
0,704279,Manchester United,home,1,André Onana,GK,28.0,90.0,68.9,1.29,...,0.0,0.481667,0.265,0.43,0.25,19.883333,19.883333,21.216109,19.559019,19.883333
1,704279,Manchester United,home,1,Noussair Mazraoui,DF,26.0,76.7,,,...,41.314408,0.0,0.0,0.0,0.0,0.0,41.314408,21.216109,19.559019,19.883333
2,704279,Manchester United,home,1,Diogo Dalot,DF,25.0,85.27,,,...,41.643593,0.0,0.0,0.0,0.0,0.0,41.643593,21.216109,19.559019,19.883333
3,704279,Manchester United,home,1,Kobbie Mainoo,MF,19.0,66.04,,,...,0.0,0.0,0.0,0.0,0.0,0.0,4.450421,21.216109,19.559019,19.883333
4,704279,Manchester United,home,1,Harry Maguire,DF,31.0,65.11,,,...,43.860164,0.0,0.0,0.0,0.0,0.0,43.860164,21.216109,19.559019,19.883333
5,704279,Manchester United,home,1,Lisandro Martínez,DF,26.0,87.55,,,...,51.113031,0.0,0.0,0.0,0.0,0.0,51.113031,21.216109,19.559019,19.883333
6,704279,Manchester United,home,1,Amad,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.216109,19.559019,19.883333
7,704279,Manchester United,home,1,Casemiro,MF,32.0,62.25,,,...,0.0,0.0,0.0,0.0,0.0,0.0,6.001932,21.216109,19.559019,19.883333
8,704279,Manchester United,home,1,Bruno Fernandes,MF,29.0,83.83,,,...,0.0,0.0,0.0,0.0,0.0,0.0,43.802964,21.216109,19.559019,19.883333
9,704279,Manchester United,home,1,Mason Mount,MF,25.0,36.59,,,...,0.0,0.0,0.0,0.0,0.0,0.0,5.036202,21.216109,19.559019,19.883333


In [40]:
import pandas as pd


def normalize(col):
    valid = col.dropna()
    if valid.empty or valid.max() == valid.min():
        return pd.Series(0.0, index=col.index)
    return ((col - valid.min()) / (valid.max() - valid.min())).fillna(0)

df['Pos'] = df['Pos'].fillna('').astype(str)

# PERSONAL POINT

df['GA_n']    = normalize(df['G+A/Match'])
df['xAG_n']   = normalize(df['xAG'])
df['PrgC_n']  = normalize(df['PrgC'])

df['ATK'] = 100 * (
    0.50 * df['GA_n'] +
    0.30 * df['xAG_n'] +
    0.20 * df['PrgC_n']
)


df['Clr_n'] = normalize(df['Clr/Match'])
df['Tkl_n'] = normalize(df['TklW%'])
df['Blk_n'] = normalize(df['Blocks/Match'])
df['Int_n'] = normalize(df['Int/Match'])
df['Err_n'] = normalize(df['Err/Match'])

df['DEF'] = 100 * (
    0.30 * df['Tkl_n'] +
    0.25 * df['Int_n'] +
    0.20 * df['Clr_n'] +
    0.15 * df['Blk_n'] -
    0.10 * df['Err_n']
)


df['Save_n']  = normalize(df['Save%'])
df['CS_n']    = normalize(df['CS%'])
df['GA_gk_n'] = normalize(df['GA/Match'])
df['PK_n']    = normalize(df['PKsv%'])

df['GK'] = 100 * (
    0.35 * df['Save_n'] +
    0.25 * df['CS_n'] +
    0.20 * df['PK_n'] -
    0.20 * df['GA_gk_n']
)


# RATING
def calc_rating(row):
    pos = row['Pos']
    if pos == '':
        return 0
    if 'GK' in pos:
        return row['GK']
    if 'DF' in pos and 'MF' in pos:
        return 0.65 * row['DEF'] + 0.35 * row['ATK']
    if 'MF' in pos and 'FW' in pos:
        return 0.70 * row['ATK'] + 0.30 * row['DEF']
    if 'DF' in pos:
        return row['DEF']
    if 'MF' in pos:
        return 0.60 * row['ATK'] + 0.40 * row['DEF']
    if 'FW' in pos:
        return row['ATK']
    return 0

df['Rating'] = df.apply(calc_rating, axis=1)

df.loc[df['Min/Match'] < 30, ['ATK','DEF','GK','Rating']] *= 0.5

# TEAM

atk_team = (
    df[df['Pos'].str.contains('FW|MF')]
    .groupby('teamId')['ATK']
    .mean()
    .rename('ATK_team')
)

def_team = (
    df[df['Pos'].str.contains('DF|MF')]
    .groupby('teamId')['DEF']
    .mean()
    .rename('DEF_team')
)

gk_team = (
    df[df['Pos'].str.contains('GK')]
    .groupby('teamId')['GK']
    .mean()
    .rename('GK_team')
)

df = df.merge(atk_team, on='teamId', how='left')
df = df.merge(def_team, on='teamId', how='left')
df = df.merge(gk_team, on='teamId', how='left')


norm_cols = [c for c in df.columns if c.endswith('_n')]
df.drop(columns=norm_cols, inplace=True)


df.head(20)


Unnamed: 0,eventId,teamId,homeAway,starter,athleteDisplayName,Pos,Age,Min/Match,Save%,GA/Match,...,ATK,DEF,GK,Rating,ATK_team_x,DEF_team_x,GK_team_x,ATK_team_y,DEF_team_y,GK_team_y
0,704279,Manchester United,home,1,André Onana,GK,28.0,90.0,68.9,1.29,...,0.0,0.0,19.883333,19.883333,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
1,704279,Manchester United,home,1,Noussair Mazraoui,DF,26.0,76.7,,,...,0.0,41.314408,0.0,41.314408,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
2,704279,Manchester United,home,1,Diogo Dalot,DF,25.0,85.27,,,...,0.0,41.643593,0.0,41.643593,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
3,704279,Manchester United,home,1,Kobbie Mainoo,MF,19.0,66.04,,,...,7.417368,0.0,0.0,4.450421,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
4,704279,Manchester United,home,1,Harry Maguire,DF,31.0,65.11,,,...,0.0,43.860164,0.0,43.860164,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
5,704279,Manchester United,home,1,Lisandro Martínez,DF,26.0,87.55,,,...,0.0,51.113031,0.0,51.113031,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
6,704279,Manchester United,home,1,Amad,,,,,,...,0.0,0.0,0.0,0.0,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
7,704279,Manchester United,home,1,Casemiro,MF,32.0,62.25,,,...,10.00322,0.0,0.0,6.001932,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
8,704279,Manchester United,home,1,Bruno Fernandes,MF,29.0,83.83,,,...,73.004941,0.0,0.0,43.802964,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333
9,704279,Manchester United,home,1,Mason Mount,MF,25.0,36.59,,,...,8.393671,0.0,0.0,5.036202,21.216109,19.559019,19.883333,21.216109,19.559019,19.883333


In [41]:
df.columns


Index(['eventId', 'teamId', 'homeAway', 'starter', 'athleteDisplayName', 'Pos',
       'Age', 'Min/Match', 'Save%', 'GA/Match', 'CS%', 'PKsv%', 'Clr/Match',
       'TklW%', 'Blocks/Match', 'Int/Match', 'Err/Match', 'G+A/Match', 'xAG',
       'PrgC', 'PrgP', 'KP/Match', 'G+A-PK/Match', 'npxG', 'PrgR', 'SoT%',
       'Sh/90', 'ATK', 'DEF', 'GK', 'Rating', 'ATK_team_x', 'DEF_team_x',
       'GK_team_x', 'ATK_team_y', 'DEF_team_y', 'GK_team_y'],
      dtype='object')