#### Cleaning Players Data

In [1]:
import pandas as pd
import regex as re
from datetime import datetime    
from unidecode import unidecode

In [2]:
players = pd.read_csv("2020.csv")

In [3]:
players

Unnamed: 0,League,Player Link,Player,Position,Date of Birth,Country,Join,Contract,Market Value,Club,Club Link,Season
0,Liga Portugal,https://www.transfermarkt.co.uk/odysseas-vlach...,Odysseas Vlachodimos,Goalkeeper,"Apr 26, 1994 (27)",Greece,"Jul 1, 2018","Jun 30, 2027",€12.00m,SL Benfica,https://www.transfermarkt.co.uk/benfica-lissab...,https://www.transfermarkt.co.uk/liga-portugal/...
1,Liga Portugal,https://www.transfermarkt.co.uk/helton-leite/p...,Helton Leite,Goalkeeper,"Nov 2, 1990 (30)",Brazil,"Aug 8, 2020","Jun 30, 2024",€7.00m,SL Benfica,https://www.transfermarkt.co.uk/benfica-lissab...,https://www.transfermarkt.co.uk/liga-portugal/...
2,Liga Portugal,https://www.transfermarkt.co.uk/mile-svilar/pr...,Mile Svilar,Goalkeeper,"Aug 27, 1999 (21)",Serbia,"Aug 28, 2017","Jun 30, 2027",€2.00m,SL Benfica,https://www.transfermarkt.co.uk/benfica-lissab...,https://www.transfermarkt.co.uk/liga-portugal/...
3,Liga Portugal,https://www.transfermarkt.co.uk/fabio-duarte/p...,Fábio Duarte,Goalkeeper,"May 11, 1998 (23)",Portugal,-,"Jun 30, 2024",€350k,SL Benfica,https://www.transfermarkt.co.uk/benfica-lissab...,https://www.transfermarkt.co.uk/liga-portugal/...
4,Liga Portugal,https://www.transfermarkt.co.uk/samuel-soares/...,Samuel Soares,Goalkeeper,"Jun 15, 2002 (19)",Portugal,-,"Jun 30, 2027",-,SL Benfica,https://www.transfermarkt.co.uk/benfica-lissab...,https://www.transfermarkt.co.uk/liga-portugal/...
...,...,...,...,...,...,...,...,...,...,...,...,...
8498,Süper Lig,https://www.transfermarkt.co.uk/ally-samatta/p...,Ally Samatta,Centre-Forward,"Dec 23, 1992 (28)",Tanzania,"Jul 1, 2021","Jun 30, 2025",€4.20m,Fenerbahce,https://www.transfermarkt.co.uk/fenerbahce-ist...,https://www.transfermarkt.co.uk/super-lig/star...
8499,Süper Lig,https://www.transfermarkt.co.uk/enner-valencia...,Enner Valencia,Centre-Forward,"Nov 4, 1989 (31)",Ecuador,"Aug 28, 2020","Jun 30, 2026",€3.00m,Fenerbahce,https://www.transfermarkt.co.uk/fenerbahce-ist...,https://www.transfermarkt.co.uk/super-lig/star...
8500,Süper Lig,https://www.transfermarkt.co.uk/michael-frey/p...,Michael Frey,Centre-Forward,"Jul 19, 1994 (26)",Switzerland,"Aug 27, 2018","Jun 30, 2024",€2.20m,Fenerbahce,https://www.transfermarkt.co.uk/fenerbahce-ist...,https://www.transfermarkt.co.uk/super-lig/star...
8501,Süper Lig,https://www.transfermarkt.co.uk/kemal-ademi/pr...,Kemal Ademi,Centre-Forward,"Jan 23, 1996 (25)",Switzerland,"Oct 5, 2020","Jun 30, 2025",€1.60m,Fenerbahce,https://www.transfermarkt.co.uk/fenerbahce-ist...,https://www.transfermarkt.co.uk/super-lig/star...


In [4]:
players["Year"] = 2020

In [5]:
#Reformatting how club are written
players["Club"] =[unidecode(w) for w in players["Club"]]

players["Player"] =[unidecode(w) for w in players["Player"]]

In [6]:
players["Player"] = [re.sub(' +', ' ', w) for w in players["Player"]]

#### Transforming Values

In [7]:
#Getting the number of days left on contract
Contract_end = []

for w in players["Contract"] : 
    try:
        length = datetime.strptime(str(w), '%b %d, %Y') - datetime(2022,7,1,0,0,0,0)
        Contract_end.append(length.days)  
    except (ValueError, AttributeError): 
        Contract_end.append("NA")

players["Contract"] = Contract_end

In [8]:
#Getting the Age
Age = []

for row in players["Date of Birth"] :
    try : 
        Age.append(re.search(r'\((.*?)\)',row[row.rindex(',')+1:]).group(1))
    except (ValueError, AttributeError): 
        Age.append("NA")
        
players["Date of Birth"] = Age

players = players.rename(columns={'Date of Birth': 'Age'})

In [9]:
#Number of days since they joined the club
Longetivity = []
for w in players["Join"] : 
    try:
        length = datetime(2022,7,1,0,0,0,0) - datetime.strptime(str(w), '%b %d, %Y')
        Longetivity.append(length.days) 
    except (ValueError, AttributeError): 
        Longetivity.append("NA")

players["Join"] = Longetivity

In [10]:
#Converting to numeric
players[['Age', 'Join', 'Contract']] = players[['Age', 'Join', 'Contract']].apply(pd.to_numeric, errors='coerce', axis=1)

In [11]:
#Transforming the market value column
def value_to_float(x):
    x = str(x).replace('€', '')
    if 'k' in x:
        return float(x.replace('k', '')) / 1000
    if 'm' in x:
        return float(x.replace('m', ''))
    return 0

In [12]:
#Formatting
players['Market Value'] = players['Market Value'].apply(value_to_float)

In [13]:
names = ["Alisson", "Neymar", "Denilson", "Vitinho", "Martinelli", "Hulk", "Koke", "Raphinha", "Casemiro", "Marcelo", "Jorginho", "Ederson", "Willian", "Antony", "Marquinhos", "Thiago"]

for name in names : 
    row = players.loc[players["Player"] == name,].sort_values('Market Value', ascending=False).drop_duplicates('Player').iloc[:,:]
    players = players.loc[players["Player"] != name,]
    players = pd.concat([players, row], axis=0, ignore_index=True)

In [14]:
#In case of January transfer/ loan keep the original club
players = players.sort_values('Join', ascending=False).drop_duplicates('Player').sort_index()

#### Dropping Columns

In [15]:
#Contract Column dropped even though I could be kept however contract is new contract
players = players.drop([ "Season", "Club Link", "Contract"], axis=1)

#### Club Cleaning

In [16]:
players["Club"] = [l.replace('FC', '') for l in players["Club"]]

In [17]:
players["Club"] = [l.replace('Wolverhampton Wanderers', 'Wolves') for l in players["Club"]]

In [18]:
players["Club"] = [l.replace('Olympique', '') for l in players["Club"]]

In [19]:
players["Club"] = [l.replace('Hertha BSC', 'Hertha') for l in players["Club"]]

In [20]:
players

Unnamed: 0,League,Player Link,Player,Position,Age,Country,Join,Market Value,Club,Year
0,Liga Portugal,https://www.transfermarkt.co.uk/odysseas-vlach...,Odysseas Vlachodimos,Goalkeeper,27.0,Greece,1461.0,12.00,SL Benfica,2020
1,Liga Portugal,https://www.transfermarkt.co.uk/helton-leite/p...,Helton Leite,Goalkeeper,30.0,Brazil,692.0,7.00,SL Benfica,2020
2,Liga Portugal,https://www.transfermarkt.co.uk/mile-svilar/pr...,Mile Svilar,Goalkeeper,21.0,Serbia,1768.0,2.00,SL Benfica,2020
3,Liga Portugal,https://www.transfermarkt.co.uk/fabio-duarte/p...,Fabio Duarte,Goalkeeper,23.0,Portugal,,0.35,SL Benfica,2020
4,Liga Portugal,https://www.transfermarkt.co.uk/samuel-soares/...,Samuel Soares,Goalkeeper,19.0,Portugal,,0.00,SL Benfica,2020
...,...,...,...,...,...,...,...,...,...,...
8479,Premier League,https://www.transfermarkt.co.uk/ederson/profil...,Ederson,Goalkeeper,27.0,Brazil,1826.0,50.00,Manchester City,2020
8480,Premier League,https://www.transfermarkt.co.uk/willian/profil...,Willian,Left Winger,32.0,Brazil,686.0,9.00,Arsenal,2020
8481,Eredivisie,https://www.transfermarkt.co.uk/antony/profil/...,Antony,Right Winger,21.0,Brazil,730.0,25.00,Ajax Amsterdam,2020
8482,Ligue 1,https://www.transfermarkt.co.uk/marquinhos/pro...,Marquinhos,Centre-Back,27.0,Brazil,3269.0,75.00,Paris Saint-Germain,2020


In [21]:
players.to_csv('Players_2020.csv', encoding='utf-8')