#### Cleaning Players Data

In [21]:
import pandas as pd
import regex as re
from datetime import datetime    
from unidecode import unidecode

In [22]:
players = pd.read_csv("2022.csv", index_col = [0])

In [23]:
players["Year"] = 2022

In [24]:
#Reformatting how club are written
players["Club"] =[unidecode(w) for w in players["Club"]]

players["Player"] =[unidecode(w) for w in players["Player"]]

In [25]:
players["Player"] = [re.sub(' +', ' ', w) for w in players["Player"]]

#### Transforming Values

In [26]:
#Getting the number of days left on contract
Contract_end = []

for w in players["Contract"] : 
    try:
        length = datetime.strptime(str(w), '%b %d, %Y') - datetime(2022,7,1,0,0,0,0)
        Contract_end.append(length.days)  
    except (ValueError, AttributeError): 
        Contract_end.append("NA")

players["Contract"] = Contract_end

In [27]:
#Getting the Age
Age = []

for row in players["Date of Birth"] :
    try : 
        Age.append(re.search(r'\((.*?)\)',row[row.rindex(',')+1:]).group(1))
    except (ValueError, AttributeError): 
        Age.append("NA")
        
players["Date of Birth"] = Age

players = players.rename(columns={'Date of Birth': 'Age'})

In [28]:
#Number of days since they joined the club
Longetivity = []
for w in players["Join"] : 
    try:
        length = datetime(2022,7,1,0,0,0,0) - datetime.strptime(str(w), '%b %d, %Y')
        Longetivity.append(length.days) 
    except (ValueError, AttributeError): 
        Longetivity.append("NA")

players["Join"] = Longetivity

In [29]:
#Converting to numeric
players[['Age', 'Join', 'Contract']] = players[['Age', 'Join', 'Contract']].apply(pd.to_numeric, errors='coerce', axis=1)

In [30]:
#Transforming the market value column
def value_to_float(x):
    x = str(x).replace('€', '')
    if 'k' in x:
        return float(x.replace('k', '')) / 1000
    if 'm' in x:
        return float(x.replace('m', ''))
    return 0

In [31]:
#Formatting
players['Market Value'] = players['Market Value'].apply(value_to_float)

In [32]:
#In case of January transfer/ loan keep the original club
players = players.sort_values('Join', ascending=False).drop_duplicates('Player').sort_index()

In [33]:
players.loc[players["Player"] == "Lionel Messi"]

Unnamed: 0_level_0,Player Link,Player,Position,Age,Country,Join,Contract,Market Value,Club,Club Link,Season,Year
League,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Ligue 1,https://www.transfermarkt.co.uk/lionel-messi/p...,Lionel Messi,Right Winger,35.0,Argentina,325.0,364.0,50.0,Paris Saint-Germain,https://www.transfermarkt.co.uk/fc-paris-saint...,https://www.transfermarkt.co.uk/ligue-1/starts...,2022


#### Dropping Columns

In [34]:
#Contract Column dropped even though I could be kept however contract is new contract
players = players.drop([ "Season", "Club Link", "Contract"], axis=1)

#### Club Cleaning

In [35]:
players["Club"] = [l.replace('FC', '') for l in players["Club"]]

In [36]:
players["Club"] = [l.replace('Wolverhampton Wanderers', 'Wolves') for l in players["Club"]]

In [37]:
players["Club"] = [l.replace('Olympique', '') for l in players["Club"]]

In [38]:
players["Club"] = [l.replace('Hertha BSC', 'Hertha') for l in players["Club"]]

In [39]:
players

Unnamed: 0_level_0,Player Link,Player,Position,Age,Country,Join,Market Value,Club,Year
League,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bundesliga,https://www.transfermarkt.co.uk/sebastian-vasi...,Sebastian Vasiliadis,Central Midfield,24.0,Greece,365.0,1.800,Arminia Bielefeld,2022
Bundesliga,https://www.transfermarkt.co.uk/julian-albrech...,Julian Albrecht,Central Midfield,20.0,Germany,1096.0,0.175,Hertha,2022
Bundesliga,https://www.transfermarkt.co.uk/jacob-barrett-...,Jacob Barrett Laursen,Left-Back,27.0,Denmark,730.0,1.700,Arminia Bielefeld,2022
Bundesliga,https://www.transfermarkt.co.uk/dickson-abiama...,Dickson Abiama,Centre-Forward,23.0,Nigeria,730.0,0.800,SpVgg Greuther Furth,2022
Bundesliga,https://www.transfermarkt.co.uk/melayro-bogard...,Melayro Bogarde,Defensive Midfield,20.0,Netherlands,730.0,1.200,TSG 1899 Hoffenheim,2022
...,...,...,...,...,...,...,...,...,...
Süper Lig,https://www.transfermarkt.co.uk/konrad-michala...,Konrad Michalak,Right Winger,24.0,Poland,0.0,1.700,Konyaspor,2022
Süper Lig,https://www.transfermarkt.co.uk/hakan-ozmert/p...,Hakan Ozmert,Central Midfield,37.0,Turkey,1617.0,0.150,Antalyaspor,2022
Süper Lig,https://www.transfermarkt.co.uk/mehmet-albayra...,Mehmet Albayrak,Centre-Back,18.0,Turkey,0.0,0.050,Sivasspor,2022
Süper Lig,https://www.transfermarkt.co.uk/enzo-crivelli/...,Enzo Crivelli,Centre-Forward,27.0,France,297.0,1.800,Antalyaspor,2022


In [40]:
players.to_csv('Players_2022.csv', encoding='utf-8')