## Data scraping for wages

In [47]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time


wages_url = "https://fbref.com/en/comps/Big5/wages/Big-5-European-Leagues-Wages"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36"
}
response = requests.get(wages_url)

# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

# Find the table rows containing team information
team_rows = soup.find_all("td", class_="left", attrs={"data-stat": "team"})

# Initialize empty list to store the extracted data
player_data = []

# Loop through each team row to scrape player wages
for team_row in team_rows:
    team_link = team_row.find("a")
    team_url = "https://fbref.com" + team_link["href"]

  
    time.sleep(3)

    
    team_response = requests.get(team_url)

    
    team_soup = BeautifulSoup(team_response.content, "html.parser")

    # Find the table rows containing player data
    rows = team_soup.find_all("tr", class_="")

    # Extract the required columns from each row
    for row in rows:
        player_info = row.find_all("td")
        player_name_element = row.find("th", scope="row", class_="left")
        if len(player_info) > 1 and player_name_element is not None and player_name_element.find("a"):
            player_row = [info.text.strip() for info in player_info]
            player_name = player_name_element.find("a")
            player_row.insert(0, player_name.text.strip())
            player_data.append(player_row)

# Create a DataFrame from the extracted data
columns = ["Player", "Nationality", "Position", "Age", "Weekly Wages", "Annual Wages", "Notes"]
df2 = pd.DataFrame(player_data, columns=columns)

print(df2.head())  # Print the first few rows of the DataFrame


          Player Nationality Position Age  \
0  Kylian Mbappé      fr FRA       FW  23   
1   Lionel Messi      ar ARG    FW,MF  35   
2         Neymar      br BRA    MF,FW  30   
3   Keylor Navas      cr CRC       GK  35   
4  Achraf Hakimi      ma MAR    DF,MF  23   

                            Weekly Wages  \
0  € 1,384,615 (£ 1,191,199, $1,514,022)   
1  € 1,223,846 (£ 1,052,888, $1,338,228)   
2    € 1,083,846 (£ 932,444, $1,185,143)   
3        € 314,615 (£ 270,667, $344,020)   
4        € 279,808 (£ 240,721, $305,959)   

                               Annual Wages Notes  
0  € 72,000,000 (£ 61,942,345, $78,729,163)        
1  € 63,640,000 (£ 54,750,151, $69,587,833)        
2  € 56,360,000 (£ 48,487,091, $61,627,439)        
3  € 16,360,000 (£ 14,074,677, $17,889,015)        
4  € 14,550,000 (£ 12,517,516, $15,909,852)        


In [49]:
df2.to_csv("player_wages.csv", index=False)

In [66]:
wages_df = pd.read_csv('./player_wages.csv')

In [67]:
wages_df

Unnamed: 0,Player,Nationality,Position,Age,Weekly Wages,Annual Wages,Notes
0,Kylian Mbappé,fr FRA,FW,23,"€ 1,384,615 (£ 1,191,199, $1,514,022)","€ 72,000,000 (£ 61,942,345, $78,729,163)",
1,Lionel Messi,ar ARG,"FW,MF",35,"€ 1,223,846 (£ 1,052,888, $1,338,228)","€ 63,640,000 (£ 54,750,151, $69,587,833)",
2,Neymar,br BRA,"MF,FW",30,"€ 1,083,846 (£ 932,444, $1,185,143)","€ 56,360,000 (£ 48,487,091, $61,627,439)",
3,Keylor Navas,cr CRC,GK,35,"€ 314,615 (£ 270,667, $344,020)","€ 16,360,000 (£ 14,074,677, $17,889,015)",
4,Achraf Hakimi,ma MAR,"DF,MF",23,"€ 279,808 (£ 240,721, $305,959)","€ 14,550,000 (£ 12,517,516, $15,909,852)",
...,...,...,...,...,...,...,...
5109,Joško Gvardiol,hr CRO,DF,20,"€ 21,538 (£ 18,530, $23,551)","€ 1,120,000 (£ 963,548, $1,224,676)",Unverified estimation
5110,Amadou Haidara,ml MLI,MF,24,"€ 19,231 (£ 16,544, $21,028)","€ 1,000,000 (£ 860,310, $1,093,461)",
5111,Janis Blaswich,de GER,GK,31,"€ 7,115 (£ 6,121, $7,780)","€ 370,000 (£ 318,315, $404,580)",Unverified estimation
5112,Hugo Novoa,es ESP,MF,19,"€ 5,192 (£ 4,467, $5,678)","€ 270,000 (£ 232,284, $295,234)",Unverified estimation


In [68]:
wages_df = wages_df.iloc[:, :-1]

In [69]:
wages_df = wages_df.dropna()

In [70]:
wages_df

Unnamed: 0,Player,Nationality,Position,Age,Weekly Wages,Annual Wages
0,Kylian Mbappé,fr FRA,FW,23,"€ 1,384,615 (£ 1,191,199, $1,514,022)","€ 72,000,000 (£ 61,942,345, $78,729,163)"
1,Lionel Messi,ar ARG,"FW,MF",35,"€ 1,223,846 (£ 1,052,888, $1,338,228)","€ 63,640,000 (£ 54,750,151, $69,587,833)"
2,Neymar,br BRA,"MF,FW",30,"€ 1,083,846 (£ 932,444, $1,185,143)","€ 56,360,000 (£ 48,487,091, $61,627,439)"
3,Keylor Navas,cr CRC,GK,35,"€ 314,615 (£ 270,667, $344,020)","€ 16,360,000 (£ 14,074,677, $17,889,015)"
4,Achraf Hakimi,ma MAR,"DF,MF",23,"€ 279,808 (£ 240,721, $305,959)","€ 14,550,000 (£ 12,517,516, $15,909,852)"
...,...,...,...,...,...,...
5109,Joško Gvardiol,hr CRO,DF,20,"€ 21,538 (£ 18,530, $23,551)","€ 1,120,000 (£ 963,548, $1,224,676)"
5110,Amadou Haidara,ml MLI,MF,24,"€ 19,231 (£ 16,544, $21,028)","€ 1,000,000 (£ 860,310, $1,093,461)"
5111,Janis Blaswich,de GER,GK,31,"€ 7,115 (£ 6,121, $7,780)","€ 370,000 (£ 318,315, $404,580)"
5112,Hugo Novoa,es ESP,MF,19,"€ 5,192 (£ 4,467, $5,678)","€ 270,000 (£ 232,284, $295,234)"


In [71]:
# Remove commas from "Weekly Wages" column and convert to float
wages_df["Weekly Wages"] = wages_df["Weekly Wages"].str.replace(",", "").str.extract(r"\$([\d.]+)").astype(int)

# Remove commas from "Annual Wages" column and convert to float
wages_df["Annual Wages"] = wages_df["Annual Wages"].str.replace(",", "").str.extract(r"\$([\d.]+)").astype(int)

wages_df.head()



Unnamed: 0,Player,Nationality,Position,Age,Weekly Wages,Annual Wages
0,Kylian Mbappé,fr FRA,FW,23,1514022,78729163
1,Lionel Messi,ar ARG,"FW,MF",35,1338228,69587833
2,Neymar,br BRA,"MF,FW",30,1185143,61627439
3,Keylor Navas,cr CRC,GK,35,344020,17889015
4,Achraf Hakimi,ma MAR,"DF,MF",23,305959,15909852


In [73]:
wages_df["Nationality"] = wages_df["Nationality"].str[3:]

In [75]:
wages_df.to_csv("player_wages_clean.csv", index=False)

In [76]:
wages_df

Unnamed: 0,Player,Nationality,Position,Age,Weekly Wages,Annual Wages
0,Kylian Mbappé,FRA,FW,23,1514022,78729163
1,Lionel Messi,ARG,"FW,MF",35,1338228,69587833
2,Neymar,BRA,"MF,FW",30,1185143,61627439
3,Keylor Navas,CRC,GK,35,344020,17889015
4,Achraf Hakimi,MAR,"DF,MF",23,305959,15909852
...,...,...,...,...,...,...
5109,Joško Gvardiol,CRO,DF,20,23551,1224676
5110,Amadou Haidara,MLI,MF,24,21028,1093461
5111,Janis Blaswich,GER,GK,31,7780,404580
5112,Hugo Novoa,ESP,MF,19,5678,295234
