In [136]:
import pandas as pd
import numpy as np

# data preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# visualization
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [2]:
dataframe_players = pd.read_csv("dcereijo-player-scores/players")
dataframe_players.head()

Unnamed: 0,player_id,last_season,current_club_id,name,pretty_name,country_of_birth,country_of_citizenship,date_of_birth,position,sub_position,foot,height_in_cm,market_value_in_gbp,highest_market_value_in_gbp,url
0,214258,2014,3729,andrey-naletov,Andrey Naletov,Russia,Russia,1996-03-31,Defender,Centre-Back,Right,179,,23000.0,https://www.transfermarkt.co.uk/andrey-naletov...
1,45247,2014,1162,damien-perquis,Damien Perquis,France,France,1986-03-08,Goalkeeper,Goalkeeper,Right,186,360000.0,900000.0,https://www.transfermarkt.co.uk/damien-perquis...
2,61389,2014,3729,ivan-lozenkov,Ivan Lozenkov,UdSSR,Russia,1984-04-14,Defender,Centre-Back,Left,182,,270000.0,https://www.transfermarkt.co.uk/ivan-lozenkov/...
3,217036,2014,3729,aleksandr-zakarlyuka,Aleksandr Zakarlyuka,Estonia,Russia,1995-06-24,Midfield,Central Midfield,Right,178,203000.0,203000.0,https://www.transfermarkt.co.uk/aleksandr-zaka...
4,97205,2014,28095,nikita-bezlikhotnov,Nikita Bezlikhotnov,UdSSR,Russia,1990-08-19,Attack,Right Winger,Right,179,225000.0,675000.0,https://www.transfermarkt.co.uk/nikita-bezlikh...


In [3]:
dataframe_players.shape

(22604, 15)

## Games Data

In [4]:
dataframe_games = pd.read_csv("dcereijo-player-scores/games")
dataframe_games

Unnamed: 0,game_id,competition_code,season,round,date,home_club_id,away_club_id,home_club_goals,away_club_goals,home_club_position,away_club_position,stadium,attendance,referee,url
0,2457642,NLSC,2014,Final,2014-08-03,1269,610,1,0,,,Johan Cruijff ArenA,42000.0,Danny Makkelie,https://www.transfermarkt.co.uk/spielbericht/i...
1,2639088,BESC,2013,Final,2014-07-20,58,498,2,1,,,Constant Vanden Stock Stadion,13733.0,Jonathan Lardot,https://www.transfermarkt.co.uk/spielbericht/i...
2,2481145,SUC,2014,final 1st leg,2014-08-19,418,13,1,1,,,Santiago Bernabéu,75897.0,Estrada Fernández,https://www.transfermarkt.co.uk/spielbericht/i...
3,2484338,POSU,2014,Final,2014-08-10,294,2425,3,2,,,Estádio Municipal de Aveiro,30000.0,Duarte Gomes,https://www.transfermarkt.co.uk/spielbericht/i...
4,2502472,FRCH,2014,Final,2014-08-02,583,855,2,0,,,Workers Stadium,39752.0,Clément Turpin,https://www.transfermarkt.co.uk/spielbericht/i...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43955,3643307,CLQ,2021,Qualifying Round 1st leg,2021-08-17,2481,419,3,0,,,Sheriff,5281.0,Cüneyt Cakir,https://www.transfermarkt.co.uk/spielbericht/i...
43956,3629201,ELQ,2021,3rd round 2nd leg,2021-08-12,602,4059,0,1,,,LFF stadionas,,Bartosz Frankowski,https://www.transfermarkt.co.uk/spielbericht/i...
43957,3584561,CLQ,2020,Second Round 2nd leg,2021-07-28,865,371,2,1,,,MCH Arena,4900.0,Bartosz Frankowski,https://www.transfermarkt.co.uk/spielbericht/i...
43958,3643357,ELQ,2021,Qualifying Round 1st leg,2021-08-19,62,255,2,2,,,Sinobo Stadium,14543.0,Felix Zwayer,https://www.transfermarkt.co.uk/spielbericht/i...


## Club Data

In [5]:
dataframe_clubs = pd.read_csv("dcereijo-player-scores/clubs")
dataframe_clubs.head()

Unnamed: 0,club_id,name,pretty_name,domestic_competition_id,total_market_value,squad_size,average_age,foreigners_number,foreigners_percentage,national_team_players,stadium_name,stadium_seats,net_transfer_record,coach_name,url
0,3302,ud-almeria,Ud Almeria,ES1,51.66,25,25.7,10,40.0,2,Estadio de los Juegos Mediterráneos,15274,£-4.82m,Francisco Rodríguez,https://www.transfermarkt.co.uk/ud-almeria/sta...
1,5572,niki-volou,Niki Volou,GR1,3.4,24,28.0,6,25.0,1,Panthessaliko Stadio,22700,+-0,Wiljan Vloet,https://www.transfermarkt.co.uk/niki-volou/sta...
2,20698,balikesirspor,Balikesirspor,TR1,1.58,30,24.6,9,30.0,1,Balıkesir Atatürk,15800,+-0,Ismail Ertekin,https://www.transfermarkt.co.uk/balikesirspor/...
3,6894,kayseri-erciyesspor,Kayseri Erciyesspor,TR1,,0,,0,,0,Kayseri Atatürk Spor Kompleksi Yan Açık Saha,2000,+-0,Hikmet Karaman,https://www.transfermarkt.co.uk/kayseri-erciye...
4,1429,cesena-fc,Cesena Fc,IT1,6.82,29,24.9,4,13.8,1,Orogel Stadium - Dino Manuzzi,23860,+£90Th.,Pierpaolo Bisoli,https://www.transfermarkt.co.uk/cesena-fc/star...


In [6]:
dataframe_clubs_edited = dataframe_clubs.drop(labels= ["pretty_name", "domestic_competition_id", "stadium_name", "stadium_seats", "coach_name", "url"], axis= 1)

club_columns = [f"club_{x}" if x != "club_id" else x for x in list(dataframe_clubs_edited.columns) ]
mapping_club = {key:value for key, value in zip(list(dataframe_clubs_edited.columns), club_columns)}
dataframe_clubs_edited.rename(columns= mapping_club, inplace= True)
dataframe_clubs_edited

Unnamed: 0,club_id,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record
0,3302,ud-almeria,51.66,25,25.7,10,40.0,2,£-4.82m
1,5572,niki-volou,3.40,24,28.0,6,25.0,1,+-0
2,20698,balikesirspor,1.58,30,24.6,9,30.0,1,+-0
3,6894,kayseri-erciyesspor,,0,,0,,0,+-0
4,1429,cesena-fc,6.82,29,24.9,4,13.8,1,+£90Th.
...,...,...,...,...,...,...,...,...,...
374,232,spartak-moskau,106.25,25,24.6,8,32.0,11,£-10.82m
375,932,lokomotiv-moskau,71.01,25,24.1,8,32.0,6,£-28.07m
376,7378,portimonense-sc,22.28,30,24.9,26,86.7,4,+£6.41m
377,200,fc-utrecht,31.50,31,24.7,9,29.0,1,+£4.91m


## Competitions Data

In [7]:
dataframe_competitions = pd.read_csv("dcereijo-player-scores/competitions")
dataframe_competitions.head()

Unnamed: 0,competition_id,name,type,country_id,country_name,domestic_league_code,confederation,url
0,L1,bundesliga,first_tier,40,Germany,L1,europa,https://www.transfermarkt.co.uk/bundesliga/sta...
1,DFB,dfb-pokal,domestic_cup,40,Germany,L1,europa,https://www.transfermarkt.co.uk/dfb-pokal/star...
2,DFL,dfl-supercup,domestic_super_cup,40,Germany,L1,europa,https://www.transfermarkt.co.uk/dfl-supercup/s...
3,NL1,eredivisie,first_tier,122,Netherlands,NL1,europa,https://www.transfermarkt.co.uk/eredivisie/sta...
4,NLP,toto-knvb-beker,domestic_cup,122,Netherlands,NL1,europa,https://www.transfermarkt.co.uk/toto-knvb-beke...


In [8]:
dataframe_appearances = pd.read_csv("dcereijo-player-scores/appearances")
dataframe_appearances

Unnamed: 0,player_id,game_id,appearance_id,competition_id,player_club_id,goals,assists,minutes_played,yellow_cards,red_cards
0,52453,2483937.0,2483937_52453,RU1,28095,0,0,90,0,0
1,67064,2479929.0,2479929_67064,RU1,28095,0,0,90,0,0
2,67064,2483937.0,2483937_67064,RU1,28095,0,0,90,0,0
3,67064,2484582.0,2484582_67064,RU1,28095,0,0,55,0,0
4,67064,2485965.0,2485965_67064,RU1,28095,0,0,90,0,0
...,...,...,...,...,...,...,...,...,...,...
1021114,74223,3580335.0,3580335_74223,DK1,678,0,1,90,1,0
1021115,811544,3602561.0,3602561_811544,RU1,2696,0,0,2,0,0
1021116,811544,3602567.0,3602567_811544,RU1,2696,0,0,1,0,0
1021117,811544,3602578.0,3602578_811544,RU1,2696,0,0,1,0,0


In [9]:
left_df = dataframe_appearances
app_games_merged = left_df.merge(dataframe_games, on= "game_id", how= "left")
app_games_merged= app_games_merged.drop(labels= ["game_id", "url", "referee", "stadium", "attendance", "date", "appearance_id"], axis= 1)
app_games_merged

Unnamed: 0,player_id,competition_id,player_club_id,goals,assists,minutes_played,yellow_cards,red_cards,competition_code,season,round,home_club_id,away_club_id,home_club_goals,away_club_goals,home_club_position,away_club_position
0,52453,RU1,28095,0,0,90,0,0,RU1,2014.0,2. Matchday,4128.0,28095.0,0.0,1.0,14.0,9.0
1,67064,RU1,28095,0,0,90,0,0,RU1,2014.0,1. Matchday,2439.0,28095.0,2.0,0.0,6.0,11.0
2,67064,RU1,28095,0,0,90,0,0,RU1,2014.0,2. Matchday,4128.0,28095.0,0.0,1.0,14.0,9.0
3,67064,RU1,28095,0,0,55,0,0,RU1,2014.0,3. Matchday,28095.0,121.0,0.0,2.0,10.0,4.0
4,67064,RU1,28095,0,0,90,0,0,RU1,2014.0,4. Matchday,964.0,28095.0,1.0,0.0,1.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1021114,74223,DK1,678,0,1,90,1,0,,,,,,,,,
1021115,811544,RU1,2696,0,0,2,0,0,RU1,2021.0,20. Matchday,2696.0,3729.0,2.0,2.0,,
1021116,811544,RU1,2696,0,0,1,0,0,RU1,2021.0,21. Matchday,2696.0,964.0,1.0,1.0,,
1021117,811544,RU1,2696,0,0,1,0,0,RU1,2021.0,22. Matchday,41231.0,2696.0,2.0,3.0,,


In [10]:
app_games_comp_merged = app_games_merged.merge(dataframe_competitions, on= "competition_id", how= "left")
app_games_comp_merged = app_games_comp_merged.drop(labels= ["confederation", "competition_code", "url", "domestic_league_code", "country_id"], axis= 1)
app_games_comp_merged.rename(columns= {"player_club_id": "club_id"}, inplace= True)
app_games_comp_merged

Unnamed: 0,player_id,competition_id,club_id,goals,assists,minutes_played,yellow_cards,red_cards,season,round,home_club_id,away_club_id,home_club_goals,away_club_goals,home_club_position,away_club_position,name,type,country_name
0,52453,RU1,28095,0,0,90,0,0,2014.0,2. Matchday,4128.0,28095.0,0.0,1.0,14.0,9.0,premier-liga,first_tier,Russia
1,67064,RU1,28095,0,0,90,0,0,2014.0,1. Matchday,2439.0,28095.0,2.0,0.0,6.0,11.0,premier-liga,first_tier,Russia
2,67064,RU1,28095,0,0,90,0,0,2014.0,2. Matchday,4128.0,28095.0,0.0,1.0,14.0,9.0,premier-liga,first_tier,Russia
3,67064,RU1,28095,0,0,55,0,0,2014.0,3. Matchday,28095.0,121.0,0.0,2.0,10.0,4.0,premier-liga,first_tier,Russia
4,67064,RU1,28095,0,0,90,0,0,2014.0,4. Matchday,964.0,28095.0,1.0,0.0,1.0,12.0,premier-liga,first_tier,Russia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1021114,74223,DK1,678,0,1,90,1,0,,,,,,,,,superligaen,first_tier,Denmark
1021115,811544,RU1,2696,0,0,2,0,0,2021.0,20. Matchday,2696.0,3729.0,2.0,2.0,,,premier-liga,first_tier,Russia
1021116,811544,RU1,2696,0,0,1,0,0,2021.0,21. Matchday,2696.0,964.0,1.0,1.0,,,premier-liga,first_tier,Russia
1021117,811544,RU1,2696,0,0,1,0,0,2021.0,22. Matchday,41231.0,2696.0,2.0,3.0,,,premier-liga,first_tier,Russia


## Get Final Season for Each Player

In [11]:
g = app_games_comp_merged.groupby(['club_id'])['season'].transform('max')
app_games_comp_merged = app_games_comp_merged[(app_games_comp_merged['season'] == g)]

app_games_comp_merged

Unnamed: 0,player_id,competition_id,club_id,goals,assists,minutes_played,yellow_cards,red_cards,season,round,home_club_id,away_club_id,home_club_goals,away_club_goals,home_club_position,away_club_position,name,type,country_name
5352,141344,CLQ,1189,0,0,5,0,0,2013.0,Second Round 1st leg,255.0,1189.0,1.0,1.0,,,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,
5354,141344,CLQ,1189,0,0,17,0,0,2013.0,Second Round 2nd leg,1189.0,255.0,0.0,5.0,,,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,
8455,164230,FR1,14171,0,0,90,0,0,2014.0,1. Matchday,14171.0,1162.0,0.0,3.0,20.0,1.0,ligue-1,first_tier,France
8482,164230,FR1,14171,0,0,74,0,0,2014.0,2. Matchday,273.0,14171.0,6.0,2.0,8.0,20.0,ligue-1,first_tier,France
8483,164230,FR1,14171,0,0,18,0,0,2014.0,3. Matchday,14171.0,583.0,0.0,0.0,20.0,6.0,ligue-1,first_tier,France
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1021113,74223,DK1,678,0,0,78,0,0,2021.0,21. Matchday,206.0,678.0,1.0,1.0,,,superligaen,first_tier,Denmark
1021115,811544,RU1,2696,0,0,2,0,0,2021.0,20. Matchday,2696.0,3729.0,2.0,2.0,,,premier-liga,first_tier,Russia
1021116,811544,RU1,2696,0,0,1,0,0,2021.0,21. Matchday,2696.0,964.0,1.0,1.0,,,premier-liga,first_tier,Russia
1021117,811544,RU1,2696,0,0,1,0,0,2021.0,22. Matchday,41231.0,2696.0,2.0,3.0,,,premier-liga,first_tier,Russia


In [12]:
app_games_comp_club_merged = app_games_comp_merged.merge(dataframe_clubs_edited, on= "club_id", how= "left")
app_games_comp_club_merged

Unnamed: 0,player_id,competition_id,club_id,goals,assists,minutes_played,yellow_cards,red_cards,season,round,home_club_id,away_club_id,home_club_goals,away_club_goals,home_club_position,away_club_position,name,type,country_name,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record
0,141344,CLQ,1189,0,0,5,0,0,2013.0,Second Round 1st leg,255.0,1189.0,1.0,1.0,,,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
1,141344,CLQ,1189,0,0,17,0,0,2013.0,Second Round 2nd leg,1189.0,255.0,0.0,5.0,,,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
2,164230,FR1,14171,0,0,90,0,0,2014.0,1. Matchday,14171.0,1162.0,0.0,3.0,20.0,1.0,ligue-1,first_tier,France,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0
3,164230,FR1,14171,0,0,74,0,0,2014.0,2. Matchday,273.0,14171.0,6.0,2.0,8.0,20.0,ligue-1,first_tier,France,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0
4,164230,FR1,14171,0,0,18,0,0,2014.0,3. Matchday,14171.0,583.0,0.0,0.0,20.0,6.0,ligue-1,first_tier,France,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140971,74223,DK1,678,0,0,78,0,0,2021.0,21. Matchday,206.0,678.0,1.0,1.0,,,superligaen,first_tier,Denmark,aarhus-gf,17.46,27.0,24.4,12.0,44.4,5.0,£-3.56m
140972,811544,RU1,2696,0,0,2,0,0,2021.0,20. Matchday,2696.0,3729.0,2.0,2.0,,,premier-liga,first_tier,Russia,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m
140973,811544,RU1,2696,0,0,1,0,0,2021.0,21. Matchday,2696.0,964.0,1.0,1.0,,,premier-liga,first_tier,Russia,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m
140974,811544,RU1,2696,0,0,1,0,0,2021.0,22. Matchday,41231.0,2696.0,2.0,3.0,,,premier-liga,first_tier,Russia,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m


## Get Club Position

In [13]:
def get_club_pos(home_club_id, away_club_id, club_id, df):
    if home_club_id == club_id:
        return df["home_club_position"]
    elif away_club_id == club_id:
        return df["away_club_position"]
    
app_games_comp_club_merged['club_position'] = app_games_comp_club_merged.apply(lambda x: get_club_pos(x["home_club_id"], x["away_club_id"], x["club_id"], x), axis=1)
app_games_comp_club_merged = app_games_comp_club_merged.drop(["home_club_id", "away_club_id", "home_club_position", "away_club_position", "country_name", "home_club_goals", "away_club_goals"], axis= 1)
app_games_comp_club_merged

Unnamed: 0,player_id,competition_id,club_id,goals,assists,minutes_played,yellow_cards,red_cards,season,round,name,type,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position
0,141344,CLQ,1189,0,0,5,0,0,2013.0,Second Round 1st leg,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
1,141344,CLQ,1189,0,0,17,0,0,2013.0,Second Round 2nd leg,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
2,164230,FR1,14171,0,0,90,0,0,2014.0,1. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
3,164230,FR1,14171,0,0,74,0,0,2014.0,2. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
4,164230,FR1,14171,0,0,18,0,0,2014.0,3. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140971,74223,DK1,678,0,0,78,0,0,2021.0,21. Matchday,superligaen,first_tier,aarhus-gf,17.46,27.0,24.4,12.0,44.4,5.0,£-3.56m,
140972,811544,RU1,2696,0,0,2,0,0,2021.0,20. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140973,811544,RU1,2696,0,0,1,0,0,2021.0,21. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140974,811544,RU1,2696,0,0,1,0,0,2021.0,22. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,


In [14]:
app_games_comp_club_merged.rename(columns= {"name": "comp_name", "type": "comp_type"}, inplace= True)
app_games_comp_club_merged

Unnamed: 0,player_id,competition_id,club_id,goals,assists,minutes_played,yellow_cards,red_cards,season,round,comp_name,comp_type,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position
0,141344,CLQ,1189,0,0,5,0,0,2013.0,Second Round 1st leg,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
1,141344,CLQ,1189,0,0,17,0,0,2013.0,Second Round 2nd leg,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
2,164230,FR1,14171,0,0,90,0,0,2014.0,1. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
3,164230,FR1,14171,0,0,74,0,0,2014.0,2. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
4,164230,FR1,14171,0,0,18,0,0,2014.0,3. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140971,74223,DK1,678,0,0,78,0,0,2021.0,21. Matchday,superligaen,first_tier,aarhus-gf,17.46,27.0,24.4,12.0,44.4,5.0,£-3.56m,
140972,811544,RU1,2696,0,0,2,0,0,2021.0,20. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140973,811544,RU1,2696,0,0,1,0,0,2021.0,21. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140974,811544,RU1,2696,0,0,1,0,0,2021.0,22. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,


In [15]:
app_games_comp_club_merged = app_games_comp_club_merged.drop(labels= ["club_id"], axis= 1)
app_games_comp_club_merged

Unnamed: 0,player_id,competition_id,goals,assists,minutes_played,yellow_cards,red_cards,season,round,comp_name,comp_type,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position
0,141344,CLQ,0,0,5,0,0,2013.0,Second Round 1st leg,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
1,141344,CLQ,0,0,17,0,0,2013.0,Second Round 2nd leg,uefa-champions-league-qualifikation,uefa_champions_league_qualifying,,,,,,,,,
2,164230,FR1,0,0,90,0,0,2014.0,1. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
3,164230,FR1,0,0,74,0,0,2014.0,2. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
4,164230,FR1,0,0,18,0,0,2014.0,3. Matchday,ligue-1,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140971,74223,DK1,0,0,78,0,0,2021.0,21. Matchday,superligaen,first_tier,aarhus-gf,17.46,27.0,24.4,12.0,44.4,5.0,£-3.56m,
140972,811544,RU1,0,0,2,0,0,2021.0,20. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140973,811544,RU1,0,0,1,0,0,2021.0,21. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140974,811544,RU1,0,0,1,0,0,2021.0,22. Matchday,premier-liga,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,


In [16]:
app_games_comp_club_merged = app_games_comp_club_merged.drop(labels= ["competition_id", "round", "comp_name"], axis= 1)
app_games_comp_club_merged

Unnamed: 0,player_id,goals,assists,minutes_played,yellow_cards,red_cards,season,comp_type,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position
0,141344,0,0,5,0,0,2013.0,uefa_champions_league_qualifying,,,,,,,,,
1,141344,0,0,17,0,0,2013.0,uefa_champions_league_qualifying,,,,,,,,,
2,164230,0,0,90,0,0,2014.0,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
3,164230,0,0,74,0,0,2014.0,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
4,164230,0,0,18,0,0,2014.0,first_tier,thonon-evian-grand-geneve-fc,1.60,20.0,26.4,4.0,20.0,1.0,+-0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140971,74223,0,0,78,0,0,2021.0,first_tier,aarhus-gf,17.46,27.0,24.4,12.0,44.4,5.0,£-3.56m,
140972,811544,0,0,2,0,0,2021.0,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140973,811544,0,0,1,0,0,2021.0,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,
140974,811544,0,0,1,0,0,2021.0,first_tier,krylya-sovetov-samara,23.04,26.0,23.7,4.0,15.4,2.0,+£1.16m,


In [17]:
agg_data = ["goals", "assists", "minutes_played", "yellow_cards", "red_cards"]
aggregation_functions = {key: "sum" if key in agg_data else "first" for key in app_games_comp_club_merged.columns.tolist()}


app_games_comp_club_merged_joined = app_games_comp_club_merged.groupby(app_games_comp_club_merged['player_id']).aggregate(aggregation_functions)
app_games_comp_club_merged_joined

Unnamed: 0_level_0,player_id,goals,assists,minutes_played,yellow_cards,red_cards,season,comp_type,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1327,1327,2,0,1297,7,0,2016.0,first_tier,as-nancy-lorraine,16.88,30.0,24.2,16.0,53.3,4.0,+-0,20.0
2587,2587,1,0,2367,8,0,2016.0,first_tier,gaziantepspor,,0.0,,0.0,,0.0,+-0,15.0
3333,3333,0,3,939,5,0,2021.0,uefa_champions_league,fc-liverpool,800.10,27.0,27.0,19.0,70.4,17.0,£-51.75m,3.0
3455,3455,8,2,1079,2,0,2021.0,uefa_champions_league,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,£-66.69m,2.0
4188,4188,1,1,978,4,0,2021.0,first_tier,vitoria-guimaraes-sc,39.56,29.0,23.9,12.0,41.4,4.0,+£4.59m,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917120,917120,0,0,74,1,0,2021.0,first_tier,es-troyes-ac,52.88,32.0,26.2,18.0,56.3,8.0,£-19.62m,18.0
919173,919173,0,0,3,0,0,2021.0,first_tier,stade-reims,89.10,30.0,23.7,21.0,70.0,12.0,£-855Th.,14.0
926954,926954,0,0,24,1,0,2021.0,first_tier,fc-nordsjaelland,22.86,31.0,20.7,17.0,54.8,0.0,+£14.45m,10.0
934034,934034,0,0,3,0,0,2021.0,domestic_cup,volos-nps,8.42,26.0,26.3,19.0,73.1,3.0,+£1.29m,


## Specifying Goal - Assist - Minutes Played Per Compt Type

In [18]:
comp_type_list= app_games_comp_club_merged["comp_type"].unique().tolist()
comp_type_list

['uefa_champions_league_qualifying',
 'first_tier',
 'domestic_cup',
 'europa_league_qualifying',
 'europa_league',
 'fifa_club_world_cup',
 'uefa_champions_league',
 'uefa_europa_conference_league_qualifiers',
 'domestic_super_cup',
 'uefa_super_cup']

#### Get Data We want to dissern by competition type

In [19]:
agg_data = ["goals", "assists", "minutes_played"]

In [20]:
for data in agg_data:
    for comp_type in comp_type_list:
        app_games_comp_club_merged_joined[f"{data}_{comp_type}"] = app_games_comp_club_merged.groupby(["player_id"]).apply(lambda x: x.loc[x["comp_type"] == comp_type, data].sum())

In [21]:
app_games_comp_club_merged_joined = app_games_comp_club_merged_joined.drop(labels= ["comp_type", "player_id"], axis= 1)
app_games_comp_club_merged_joined.rename({"goals": "total_goals", "assists":"total_assists", "minutes_played": "total_minutes_played"}, inplace= True)
app_games_comp_club_merged_joined

Unnamed: 0_level_0,goals,assists,minutes_played,yellow_cards,red_cards,season,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position,goals_uefa_champions_league_qualifying,goals_first_tier,goals_domestic_cup,goals_europa_league_qualifying,goals_europa_league,goals_fifa_club_world_cup,goals_uefa_champions_league,goals_uefa_europa_conference_league_qualifiers,goals_domestic_super_cup,goals_uefa_super_cup,assists_uefa_champions_league_qualifying,assists_first_tier,assists_domestic_cup,assists_europa_league_qualifying,assists_europa_league,assists_fifa_club_world_cup,assists_uefa_champions_league,assists_uefa_europa_conference_league_qualifiers,assists_domestic_super_cup,assists_uefa_super_cup,minutes_played_uefa_champions_league_qualifying,minutes_played_first_tier,minutes_played_domestic_cup,minutes_played_europa_league_qualifying,minutes_played_europa_league,minutes_played_fifa_club_world_cup,minutes_played_uefa_champions_league,minutes_played_uefa_europa_conference_league_qualifiers,minutes_played_domestic_super_cup,minutes_played_uefa_super_cup
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
1327,2,0,1297,7,0,2016.0,as-nancy-lorraine,16.88,30.0,24.2,16.0,53.3,4.0,+-0,20.0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1297,0,0,0,0,0,0,0,0
2587,1,0,2367,8,0,2016.0,gaziantepspor,,0.0,,0.0,,0.0,+-0,15.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2367,0,0,0,0,0,0,0,0
3333,0,3,939,5,0,2021.0,fc-liverpool,800.10,27.0,27.0,19.0,70.4,17.0,£-51.75m,3.0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,631,161,0,0,0,147,0,0,0
3455,8,2,1079,2,0,2021.0,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,£-66.69m,2.0,0,8,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,918,0,0,0,0,161,0,0,0
4188,1,1,978,4,0,2021.0,vitoria-guimaraes-sc,39.56,29.0,23.9,12.0,41.4,4.0,+£4.59m,11.0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,951,27,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917120,0,0,74,1,0,2021.0,es-troyes-ac,52.88,32.0,26.2,18.0,56.3,8.0,£-19.62m,18.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74,0,0,0,0,0,0,0,0
919173,0,0,3,0,0,2021.0,stade-reims,89.10,30.0,23.7,21.0,70.0,12.0,£-855Th.,14.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0
926954,0,0,24,1,0,2021.0,fc-nordsjaelland,22.86,31.0,20.7,17.0,54.8,0.0,+£14.45m,10.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,0
934034,0,0,3,0,0,2021.0,volos-nps,8.42,26.0,26.3,19.0,73.1,3.0,+£1.29m,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0


## Merging With Players Dataframe

In [152]:
final_df = app_games_comp_club_merged_joined.merge(dataframe_players, on= "player_id", how= "left")
final_df

Unnamed: 0,player_id,goals,assists,minutes_played,yellow_cards,red_cards,season,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position,goals_uefa_champions_league_qualifying,goals_first_tier,goals_domestic_cup,goals_europa_league_qualifying,goals_europa_league,goals_fifa_club_world_cup,goals_uefa_champions_league,goals_uefa_europa_conference_league_qualifiers,goals_domestic_super_cup,goals_uefa_super_cup,assists_uefa_champions_league_qualifying,assists_first_tier,assists_domestic_cup,assists_europa_league_qualifying,assists_europa_league,assists_fifa_club_world_cup,assists_uefa_champions_league,assists_uefa_europa_conference_league_qualifiers,assists_domestic_super_cup,assists_uefa_super_cup,minutes_played_uefa_champions_league_qualifying,minutes_played_first_tier,minutes_played_domestic_cup,minutes_played_europa_league_qualifying,minutes_played_europa_league,minutes_played_fifa_club_world_cup,minutes_played_uefa_champions_league,minutes_played_uefa_europa_conference_league_qualifiers,minutes_played_domestic_super_cup,minutes_played_uefa_super_cup,last_season,current_club_id,name,pretty_name,country_of_birth,country_of_citizenship,date_of_birth,position,sub_position,foot,height_in_cm,market_value_in_gbp,highest_market_value_in_gbp,url
0,1327,2,0,1297,7,0,2016.0,as-nancy-lorraine,16.88,30.0,24.2,16.0,53.3,4.0,+-0,20.0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1297,0,0,0,0,0,0,0,0,2016.0,1159.0,alou-diarra,Alou Diarra,France,France,1981-07-15,Midfield,Defensive Midfield,Right,189.0,,13050000.0,https://www.transfermarkt.co.uk/alou-diarra/pr...
1,2587,1,0,2367,8,0,2016.0,gaziantepspor,,0.0,,0.0,,0.0,+-0,15.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2367,0,0,0,0,0,0,0,0,2016.0,524.0,elyasa-sume,Elyasa Sume,Germany,Turkey,1983-08-13,Defender,Centre-Back,,0.0,,1800000.0,https://www.transfermarkt.co.uk/elyasa-sume/pr...
2,3333,0,3,939,5,0,2021.0,fc-liverpool,800.10,27.0,27.0,19.0,70.4,17.0,£-51.75m,3.0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,631,161,0,0,0,147,0,0,0,2021.0,31.0,james-milner,James Milner,England,England,1986-01-04,Midfield,Central Midfield,Right,175.0,1800000.0,18900000.0,https://www.transfermarkt.co.uk/james-milner/p...
3,3455,8,2,1079,2,0,2021.0,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,£-66.69m,2.0,0,8,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,918,0,0,0,0,161,0,0,0,2021.0,5.0,zlatan-ibrahimovic,Zlatan Ibrahimovic,Sweden,Sweden,1981-10-03,Attack,Centre-Forward,Both,195.0,3600000.0,41400000.0,https://www.transfermarkt.co.uk/zlatan-ibrahim...
4,4188,1,1,978,4,0,2021.0,vitoria-guimaraes-sc,39.56,29.0,23.9,12.0,41.4,4.0,+£4.59m,11.0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,951,27,0,0,0,0,0,0,0,2021.0,2420.0,ricardo-quaresma,Ricardo Quaresma,Portugal,Portugal,1983-09-26,Attack,Right Winger,Right,175.0,270000.0,22500000.0,https://www.transfermarkt.co.uk/ricardo-quares...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8117,917120,0,0,74,1,0,2021.0,es-troyes-ac,52.88,32.0,26.2,18.0,56.3,8.0,£-19.62m,18.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,74,0,0,0,0,0,0,0,0,2021.0,1095.0,tanguy-zoukrou,Tanguy Zoukrou,France,France,2003-05-07,Defender,Centre-Back,,189.0,360000.0,360000.0,https://www.transfermarkt.co.uk/tanguy-zoukrou...
8118,919173,0,0,3,0,0,2021.0,stade-reims,89.10,30.0,23.7,21.0,70.0,12.0,£-855Th.,14.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,2021.0,1421.0,ibrahim-diakite,Ibrahim Diakite,,France,2003-10-31,Defender,Right-Back,,178.0,,,https://www.transfermarkt.co.uk/ibrahim-diakit...
8119,926954,0,0,24,1,0,2021.0,fc-nordsjaelland,22.86,31.0,20.7,17.0,54.8,0.0,+£14.45m,10.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,0,2021.0,2778.0,yannick-agnero,Yannick Agnero,,Cote d'Ivoire,2003-02-20,Attack,Centre-Forward,,0.0,45000.0,45000.0,https://www.transfermarkt.co.uk/yannick-agnero...
8120,934034,0,0,3,0,0,2021.0,volos-nps,8.42,26.0,26.3,19.0,73.1,3.0,+£1.29m,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,2021.0,60949.0,dimitrios-metaxas,Dimitrios Metaxas,,Greece,2003-12-16,Midfield,Central Midfield,,0.0,45000.0,45000.0,https://www.transfermarkt.co.uk/dimitrios-meta...


In [153]:
final_df = final_df[final_df["position"] == "Attack"].drop(labels= ["position", "url", "current_club_id", "pretty_name", "last_season"], axis= 1)
final_df = final_df.reset_index(drop= True)
final_df

Unnamed: 0,player_id,goals,assists,minutes_played,yellow_cards,red_cards,season,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position,goals_uefa_champions_league_qualifying,goals_first_tier,goals_domestic_cup,goals_europa_league_qualifying,goals_europa_league,goals_fifa_club_world_cup,goals_uefa_champions_league,goals_uefa_europa_conference_league_qualifiers,goals_domestic_super_cup,goals_uefa_super_cup,assists_uefa_champions_league_qualifying,assists_first_tier,assists_domestic_cup,assists_europa_league_qualifying,assists_europa_league,assists_fifa_club_world_cup,assists_uefa_champions_league,assists_uefa_europa_conference_league_qualifiers,assists_domestic_super_cup,assists_uefa_super_cup,minutes_played_uefa_champions_league_qualifying,minutes_played_first_tier,minutes_played_domestic_cup,minutes_played_europa_league_qualifying,minutes_played_europa_league,minutes_played_fifa_club_world_cup,minutes_played_uefa_champions_league,minutes_played_uefa_europa_conference_league_qualifiers,minutes_played_domestic_super_cup,minutes_played_uefa_super_cup,name,country_of_birth,country_of_citizenship,date_of_birth,sub_position,foot,height_in_cm,market_value_in_gbp,highest_market_value_in_gbp
0,3455,8,2,1079,2,0,2021.0,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,£-66.69m,2.0,0,8,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,918,0,0,0,0,161,0,0,0,zlatan-ibrahimovic,Sweden,Sweden,1981-10-03,Centre-Forward,Both,195.0,3600000.0,41400000.0
1,4188,1,1,978,4,0,2021.0,vitoria-guimaraes-sc,39.56,29.0,23.9,12.0,41.4,4.0,+£4.59m,11.0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,951,27,0,0,0,0,0,0,0,ricardo-quaresma,Portugal,Portugal,1983-09-26,Right Winger,Right,175.0,270000.0,22500000.0
2,5345,0,1,1292,1,0,2016.0,as-nancy-lorraine,16.88,30.0,24.2,16.0,53.3,4.0,+-0,20.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1292,0,0,0,0,0,0,0,0,youssouf-hadji,Morocco,Morocco,1980-02-25,Centre-Forward,Right,183.0,,4950000.0
3,5457,4,3,1835,9,0,2016.0,sc-bastia,8.96,28.0,27.7,7.0,25.0,2.0,+-0,20.0,0,4,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1835,0,0,0,0,0,0,0,0,gael-danic,France,France,1981-11-19,Left Winger,Left,176.0,,3150000.0
4,5521,5,0,1142,2,0,2015.0,gfc-ajaccio,585.00,22.0,23.9,2.0,9.1,0.0,+-0,12.0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1142,0,0,0,0,0,0,0,0,gregory-pujol,France,France,1980-01-25,Centre-Forward,Right,183.0,,3600000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2282,896154,0,1,216,0,0,2021.0,pas-giannina,8.03,28.0,25.5,12.0,42.9,2.0,£-45Th.,8.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,170,46,0,0,0,0,0,0,0,leonid-mina,,Greece,2004-03-28,Centre-Forward,Right,178.0,45000.0,45000.0
2283,906192,0,0,21,1,0,2021.0,rfc-seraing,13.14,31.0,23.8,18.0,58.1,6.0,+-0,16.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0,0,zakaria-silini,,Algeria,2003-07-05,Second Striker,,184.0,45000.0,45000.0
2284,906329,2,0,114,0,0,2021.0,sc-braga,102.15,25.0,24.5,10.0,40.0,2.0,+£22.92m,8.0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,107,0,0,7,0,0,0,0,0,roger-fernandes,Guinea-Bissau,Portugal,2005-11-21,Left Winger,Left,170.0,3600000.0,3600000.0
2285,912111,0,0,72,0,0,2021.0,cercle-brugge,17.57,25.0,23.9,14.0,56.0,6.0,£-630Th.,7.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,72,0,0,0,0,0,0,0,0,aske-sampers,,Belgium,2001-05-20,Centre-Forward,Right,184.0,135000.0,135000.0


## Feature Engineering
**Feature Engineering Tasks**:
- [x] convert *club_net_transfer_record* feature to numerical
- [x] convert *date_of_birth* feature to numerical age
- [x] Split Train/Test Data (85%/15%)  
- [x] fill 0 values in *height* with mean
- [x] fill all NaN values with mean

### Converting club_net_transfer_record Feature to Numerical

In [154]:
def convert_transfer_to_num(club_transfer_rec):
    th = 0
    symb_idx = 0
    first_num_idx = 2
    
    res = 0
    
    if club_transfer_rec == None:
        return None
    if club_transfer_rec[-1] == "0" and club_transfer_rec[:2] == "+-":
        return 0    
    if club_transfer_rec[-1] == "m":
        th = 1_000_000
        symb_idx = club_transfer_rec.index("m")
    if "Th." in club_transfer_rec:
        th = 1_000
        symb_idx = club_transfer_rec.index("Th.")
    
    res = int(th * float(club_transfer_rec[first_num_idx:symb_idx]))
    if club_transfer_rec[0] == "+" and club_transfer_rec[1] == "£":
        return res
    elif club_transfer_rec[0] == "£" and club_transfer_rec[1] == "-":
        return res*-1
    else:
        return None

In [155]:
final_df["club_net_transfer_record"] = final_df["club_net_transfer_record"].apply(lambda x: convert_transfer_to_num(x))
final_df

Unnamed: 0,player_id,goals,assists,minutes_played,yellow_cards,red_cards,season,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position,goals_uefa_champions_league_qualifying,goals_first_tier,goals_domestic_cup,goals_europa_league_qualifying,goals_europa_league,goals_fifa_club_world_cup,goals_uefa_champions_league,goals_uefa_europa_conference_league_qualifiers,goals_domestic_super_cup,goals_uefa_super_cup,assists_uefa_champions_league_qualifying,assists_first_tier,assists_domestic_cup,assists_europa_league_qualifying,assists_europa_league,assists_fifa_club_world_cup,assists_uefa_champions_league,assists_uefa_europa_conference_league_qualifiers,assists_domestic_super_cup,assists_uefa_super_cup,minutes_played_uefa_champions_league_qualifying,minutes_played_first_tier,minutes_played_domestic_cup,minutes_played_europa_league_qualifying,minutes_played_europa_league,minutes_played_fifa_club_world_cup,minutes_played_uefa_champions_league,minutes_played_uefa_europa_conference_league_qualifiers,minutes_played_domestic_super_cup,minutes_played_uefa_super_cup,name,country_of_birth,country_of_citizenship,date_of_birth,sub_position,foot,height_in_cm,market_value_in_gbp,highest_market_value_in_gbp
0,3455,8,2,1079,2,0,2021.0,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,-66690000.0,2.0,0,8,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,918,0,0,0,0,161,0,0,0,zlatan-ibrahimovic,Sweden,Sweden,1981-10-03,Centre-Forward,Both,195.0,3600000.0,41400000.0
1,4188,1,1,978,4,0,2021.0,vitoria-guimaraes-sc,39.56,29.0,23.9,12.0,41.4,4.0,4590000.0,11.0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,951,27,0,0,0,0,0,0,0,ricardo-quaresma,Portugal,Portugal,1983-09-26,Right Winger,Right,175.0,270000.0,22500000.0
2,5345,0,1,1292,1,0,2016.0,as-nancy-lorraine,16.88,30.0,24.2,16.0,53.3,4.0,0.0,20.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1292,0,0,0,0,0,0,0,0,youssouf-hadji,Morocco,Morocco,1980-02-25,Centre-Forward,Right,183.0,,4950000.0
3,5457,4,3,1835,9,0,2016.0,sc-bastia,8.96,28.0,27.7,7.0,25.0,2.0,0.0,20.0,0,4,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1835,0,0,0,0,0,0,0,0,gael-danic,France,France,1981-11-19,Left Winger,Left,176.0,,3150000.0
4,5521,5,0,1142,2,0,2015.0,gfc-ajaccio,585.00,22.0,23.9,2.0,9.1,0.0,0.0,12.0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1142,0,0,0,0,0,0,0,0,gregory-pujol,France,France,1980-01-25,Centre-Forward,Right,183.0,,3600000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2282,896154,0,1,216,0,0,2021.0,pas-giannina,8.03,28.0,25.5,12.0,42.9,2.0,-45000.0,8.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,170,46,0,0,0,0,0,0,0,leonid-mina,,Greece,2004-03-28,Centre-Forward,Right,178.0,45000.0,45000.0
2283,906192,0,0,21,1,0,2021.0,rfc-seraing,13.14,31.0,23.8,18.0,58.1,6.0,0.0,16.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0,0,zakaria-silini,,Algeria,2003-07-05,Second Striker,,184.0,45000.0,45000.0
2284,906329,2,0,114,0,0,2021.0,sc-braga,102.15,25.0,24.5,10.0,40.0,2.0,22920000.0,8.0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,107,0,0,7,0,0,0,0,0,roger-fernandes,Guinea-Bissau,Portugal,2005-11-21,Left Winger,Left,170.0,3600000.0,3600000.0
2285,912111,0,0,72,0,0,2021.0,cercle-brugge,17.57,25.0,23.9,14.0,56.0,6.0,-630000.0,7.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,72,0,0,0,0,0,0,0,0,aske-sampers,,Belgium,2001-05-20,Centre-Forward,Right,184.0,135000.0,135000.0


#### Converting *date_of_birth* Feature to Numerical Age

In [156]:
import dateutil
import datetime

def convert_dob_to_age(dob):
    if dob == None:
        return None
    # Get the current date
    now = datetime.datetime.utcnow()
    now = now.date()
    
    # Convert isoformat of date to datetime object
    date = datetime.datetime.fromisoformat(dob)
    # Get the difference between the current date and the birthday
    age = dateutil.relativedelta.relativedelta(now, date)
    age = age.years

    return age

In [157]:
final_df["date_of_birth"] = final_df["date_of_birth"].apply(lambda x: convert_dob_to_age(x))
final_df.rename(columns= {"date_of_birth": "age"}, inplace= True)
final_df

Unnamed: 0,player_id,goals,assists,minutes_played,yellow_cards,red_cards,season,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position,goals_uefa_champions_league_qualifying,goals_first_tier,goals_domestic_cup,goals_europa_league_qualifying,goals_europa_league,goals_fifa_club_world_cup,goals_uefa_champions_league,goals_uefa_europa_conference_league_qualifiers,goals_domestic_super_cup,goals_uefa_super_cup,assists_uefa_champions_league_qualifying,assists_first_tier,assists_domestic_cup,assists_europa_league_qualifying,assists_europa_league,assists_fifa_club_world_cup,assists_uefa_champions_league,assists_uefa_europa_conference_league_qualifiers,assists_domestic_super_cup,assists_uefa_super_cup,minutes_played_uefa_champions_league_qualifying,minutes_played_first_tier,minutes_played_domestic_cup,minutes_played_europa_league_qualifying,minutes_played_europa_league,minutes_played_fifa_club_world_cup,minutes_played_uefa_champions_league,minutes_played_uefa_europa_conference_league_qualifiers,minutes_played_domestic_super_cup,minutes_played_uefa_super_cup,name,country_of_birth,country_of_citizenship,age,sub_position,foot,height_in_cm,market_value_in_gbp,highest_market_value_in_gbp
0,3455,8,2,1079,2,0,2021.0,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,-66690000.0,2.0,0,8,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,918,0,0,0,0,161,0,0,0,zlatan-ibrahimovic,Sweden,Sweden,40,Centre-Forward,Both,195.0,3600000.0,41400000.0
1,4188,1,1,978,4,0,2021.0,vitoria-guimaraes-sc,39.56,29.0,23.9,12.0,41.4,4.0,4590000.0,11.0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,951,27,0,0,0,0,0,0,0,ricardo-quaresma,Portugal,Portugal,38,Right Winger,Right,175.0,270000.0,22500000.0
2,5345,0,1,1292,1,0,2016.0,as-nancy-lorraine,16.88,30.0,24.2,16.0,53.3,4.0,0.0,20.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1292,0,0,0,0,0,0,0,0,youssouf-hadji,Morocco,Morocco,42,Centre-Forward,Right,183.0,,4950000.0
3,5457,4,3,1835,9,0,2016.0,sc-bastia,8.96,28.0,27.7,7.0,25.0,2.0,0.0,20.0,0,4,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1835,0,0,0,0,0,0,0,0,gael-danic,France,France,40,Left Winger,Left,176.0,,3150000.0
4,5521,5,0,1142,2,0,2015.0,gfc-ajaccio,585.00,22.0,23.9,2.0,9.1,0.0,0.0,12.0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1142,0,0,0,0,0,0,0,0,gregory-pujol,France,France,42,Centre-Forward,Right,183.0,,3600000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2282,896154,0,1,216,0,0,2021.0,pas-giannina,8.03,28.0,25.5,12.0,42.9,2.0,-45000.0,8.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,170,46,0,0,0,0,0,0,0,leonid-mina,,Greece,18,Centre-Forward,Right,178.0,45000.0,45000.0
2283,906192,0,0,21,1,0,2021.0,rfc-seraing,13.14,31.0,23.8,18.0,58.1,6.0,0.0,16.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0,0,zakaria-silini,,Algeria,18,Second Striker,,184.0,45000.0,45000.0
2284,906329,2,0,114,0,0,2021.0,sc-braga,102.15,25.0,24.5,10.0,40.0,2.0,22920000.0,8.0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,107,0,0,7,0,0,0,0,0,roger-fernandes,Guinea-Bissau,Portugal,16,Left Winger,Left,170.0,3600000.0,3600000.0
2285,912111,0,0,72,0,0,2021.0,cercle-brugge,17.57,25.0,23.9,14.0,56.0,6.0,-630000.0,7.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,72,0,0,0,0,0,0,0,0,aske-sampers,,Belgium,20,Centre-Forward,Right,184.0,135000.0,135000.0


#### Splitting Data To Train/Test 85%/15%

In [158]:
SEED = 11

X_train, X_test, y_train, y_test = train_test_split(final_df.drop(labels= ["market_value_in_gbp", "player_id"], axis= 1),
                                                   final_df["market_value_in_gbp"],
                                                   test_size=0.15,
                                                   random_state=SEED) 

In [159]:
print(f"X_train shape: {X_train.shape}, y_train_shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test_shape: {y_test.shape}")

X_train shape: (1943, 53), y_train_shape: (1943,)
X_test shape: (344, 53), y_test_shape: (344,)


#### Fill 0 Values in Height with Mean

In [160]:
zero_height_train, zero_height_test = X_train[X_train["height_in_cm"]== 0], X_test[X_test["height_in_cm"]== 0]
print(f"We have {zero_height_train.shape[0]} datapoints with zero values for the height")

height_mean_train, height_mean_test = 0.0, 0.0
height_total_train, height_total_test = 0.0, 0.0

for height_train, height_test in zip(X_train["height_in_cm"], X_test["height_in_cm"]):
    if height_train != 0:
        height_total_train += height_train
    if height_test != 0:
        height_total_test += height_test
        
height_mean_train = height_total_train / (X_train["height_in_cm"].shape[0] - zero_height_train.shape[0])
height_mean_test = height_total_test / (X_test["height_in_cm"].shape[0] - zero_height_test.shape[0])
print(f"Height mean without zero values: train: {height_mean:.3f}, test: {height_test:.3f}")

We have 42 datapoints with zero values for the height
Height mean without zero values: train: 180.688, test: 178.000


In [161]:
def fill_zeros_mean(height):
    if height == 0:
        return height_mean
    else:
        return height

In [162]:
X_train["height_in_cm"] = X_train["height_in_cm"].apply(lambda x: fill_zeros_mean(x))
X_test["height_in_cm"] = X_test["height_in_cm"].apply(lambda x: fill_zeros_mean(x))
X_train

Unnamed: 0,goals,assists,minutes_played,yellow_cards,red_cards,season,club_name,club_total_market_value,club_squad_size,club_average_age,club_foreigners_number,club_foreigners_percentage,club_national_team_players,club_net_transfer_record,club_position,goals_uefa_champions_league_qualifying,goals_first_tier,goals_domestic_cup,goals_europa_league_qualifying,goals_europa_league,goals_fifa_club_world_cup,goals_uefa_champions_league,goals_uefa_europa_conference_league_qualifiers,goals_domestic_super_cup,goals_uefa_super_cup,assists_uefa_champions_league_qualifying,assists_first_tier,assists_domestic_cup,assists_europa_league_qualifying,assists_europa_league,assists_fifa_club_world_cup,assists_uefa_champions_league,assists_uefa_europa_conference_league_qualifiers,assists_domestic_super_cup,assists_uefa_super_cup,minutes_played_uefa_champions_league_qualifying,minutes_played_first_tier,minutes_played_domestic_cup,minutes_played_europa_league_qualifying,minutes_played_europa_league,minutes_played_fifa_club_world_cup,minutes_played_uefa_champions_league,minutes_played_uefa_europa_conference_league_qualifiers,minutes_played_domestic_super_cup,minutes_played_uefa_super_cup,name,country_of_birth,country_of_citizenship,age,sub_position,foot,height_in_cm,highest_market_value_in_gbp
413,1,0,276,0,0,2014.0,metalurg-donetsk,,0.0,,0.0,,0.0,0.0,13.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,276,0,0,0,0,0,0,0,0,aldo-adorno,Paraguay,Paraguay,40,Centre-Forward,Right,175.0,540000.0
1112,5,2,1766,3,0,2021.0,fc-everton,408.38,29.0,27.1,15.0,51.7,13.0,5850000.0,5.0,0,5,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1704,62,0,0,0,0,0,0,0,demarai-gray,England,England,25,Left Winger,Right,183.0,19800000.0
2006,7,4,1983,6,0,2021.0,fc-villarreal,325.80,26.0,27.8,12.0,46.2,13.0,-39380000.0,10.0,0,6,0,0,0,0,1,0,0,0,0,4,0,0,0,0,0,0,0,0,0,1448,14,0,0,0,431,0,0,90,yeremy-pino,Spain,Spain,19,Right Winger,Right,172.0,27000000.0
247,0,1,61,0,0,2020.0,us-lecce,29.61,27.0,25.8,15.0,55.6,2.0,450000.0,18.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,61,0,0,0,0,0,0,0,0,filippo-falco,Italy,Italy,30,Right Winger,Left,171.0,4500000.0
988,0,1,512,3,0,2021.0,rubin-kazan,52.79,24.0,24.8,7.0,29.2,7.0,-4580000.0,8.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,449,45,0,0,0,0,18,0,0,aleksandr-zuev,Kazakhstan,Russia,25,Left Winger,Right,177.0,1800000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1697,5,0,1411,2,0,2021.0,ac-mailand,432.09,27.0,26.7,20.0,74.1,17.0,-66690000.0,2.0,0,4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1132,164,0,0,0,115,0,0,0,junior-messias,Brazil,Brazil,30,Right Winger,Left,174.0,5850000.0
332,2,0,757,2,0,2021.0,ofi-kreta,13.37,28.0,27.1,12.0,42.9,3.0,585000.0,7.0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,535,222,0,0,0,0,0,0,0,luc-castaignos,Netherlands,Netherlands,29,Centre-Forward,Right,188.0,6300000.0
1293,23,10,2639,6,0,2021.0,royale-union-saint-gilloise,39.44,26.0,24.6,19.0,73.1,8.0,5260000.0,7.0,0,23,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,2639,0,0,0,0,0,0,0,0,deniz-undav,Germany,Turkey,25,Centre-Forward,Right,178.0,4500000.0
1104,9,3,2799,8,1,2020.0,dijon-fco,16.61,24.0,27.0,11.0,45.8,4.0,3380000.0,20.0,0,7,2,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,2499,300,0,0,0,0,0,0,0,aboubakar-kamara,France,Mauritania,27,Centre-Forward,Right,177.0,3600000.0


In [163]:
zero_height_train, zero_height_test = X_train[X_train["height_in_cm"]== 0], X_test[X_test["height_in_cm"]== 0] 
print(f"We have {zero_height.shape[0]}, {zero_height_test.shape[0]} (train, test) datapoints with zero values for the height")

We have 0, 0 (train, test) datapoints with zero values for the height


#### Filling NaN Values

In [164]:
def summarize_missingness(df, label= None):
    '''
    Utility function to summarize missing or NaN values
    '''
    nulls = df.isnull()
    counts = nulls.sum()
    percs = nulls.mean().mul(100.)
    
    if label is None:
        label = ""
    nulls_df = pd.DataFrame({f'{label} Count of missing/NaN values': counts, 'Percentage of missing values': percs}, 
                            index=counts.index)
    
    display(nulls_df)

In [165]:
vars_with_na = [col for col in X_train.columns if X_train[col].isnull().sum() > 0]
data_label = ["X_train", "X_test"]
for i, dataframe in enumerate([X_train, X_test]):
    summarize_missingness(dataframe[vars_with_na], label= data_label[i])

Unnamed: 0,X_train Count of missing/NaN values,Percentage of missing values
club_name,142,7.308286
club_total_market_value,271,13.947504
club_squad_size,142,7.308286
club_average_age,241,12.4035
club_foreigners_number,142,7.308286
club_foreigners_percentage,289,14.873906
club_national_team_players,142,7.308286
club_net_transfer_record,142,7.308286
club_position,180,9.264025
country_of_birth,96,4.940813


Unnamed: 0,X_test Count of missing/NaN values,Percentage of missing values
club_name,21,6.104651
club_total_market_value,34,9.883721
club_squad_size,21,6.104651
club_average_age,30,8.72093
club_foreigners_number,21,6.104651
club_foreigners_percentage,38,11.046512
club_national_team_players,21,6.104651
club_net_transfer_record,21,6.104651
club_position,25,7.267442
country_of_birth,18,5.232558


In [182]:
vars_with_na = [col for col in X_train if X_train[col].isnull().sum() > 0]
cat_vars_with_na = ['club_name', 'country_of_birth', 'foot']
print("Categorical Features with NaNs:", cat_vars_with_na)
num_vars_with_na = list(set(vars_with_na) - set(cat_vars_with_na))
print("Numerical Features with NaNs:", num_vars_with_na)

Categorical Features with NaNs: ['club_name', 'country_of_birth', 'foot']
Numerical Features with NaNs: ['club_position', 'club_total_market_value', 'club_average_age', 'highest_market_value_in_gbp', 'club_foreigners_number', 'club_foreigners_percentage', 'club_squad_size', 'club_national_team_players', 'club_net_transfer_record']


##### Numerical Features NaNs

In [188]:
X_train[num_vars_with_na] = X_train[num_vars_with_na].fillna((X_train[num_vars_with_na].mean()))
X_test[num_vars_with_na] = X_test[num_vars_with_na].fillna((X_test[num_vars_with_na].mean()))

##### Categorical Features

In [191]:
for cat_var in cat_vars_with_na:
    X_train[cat_var] = X_train[cat_var].fillna(X_train[cat_var].value_counts().index[0])
    X_test[cat_var] = X_test[cat_var].fillna(X_test[cat_var].value_counts().index[0])

In [192]:
vars_with_na = [col for col in X_train.columns if X_train[col].isnull().sum() > 0]
data_label = ["X_train", "X_test"]
for i, dataframe in enumerate([X_train, X_test]):
    summarize_missingness(dataframe[vars_with_na], label= data_label[i])

Unnamed: 0,X_train Count of missing/NaN values,Percentage of missing values


Unnamed: 0,X_test Count of missing/NaN values,Percentage of missing values


## Some EDA
**EDA Tasks**:
- [ ] Distribution/Proportion of Categorical Data
- [ ] Target Variable Distribution
- [ ] Data Info - describe
- [ ] Any Other EDA technique for Regression

## Some More Feature Engineering
**Final Feature Engineering Tasks**:
- [ ] Normalize Numerical Features 
- [ ] One-Hot-Encode Categorical Features

#### Normalize Numerical Features

#### One-Hor-Encode Categorical Features

## More Operations To Inspect
- [ ] Convert problem to classification
- [ ] Discretize market value into ranges and based on each range we obtain a class