# Introduction
After having gotten the transfer data from transfermarkt.ch, we can also get individual player stats from this website. For this, we need to scrape the website again:

In [None]:
#import packages
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

# Web Scraping preperation
Before we can scrape the data for every player, we need to make some preperations:

In [None]:
#get transfer data
file="Transfers.pkl"
transfers=pd.read_pickle(file)

In [None]:
#get player names and ids for which we need data
players=list(transfers.index)
ids=list(transfers.ID)

In [None]:
#adjsut player name so it can be used for the url
players_html=[]
for name in players:
    name=name.replace("'","")
    name=name.replace(".","")
    players_html.append(name.replace(" ","-"))
players_html

In [None]:
#Create empty lists to store data/theml code
stats=[]
total=[]
heights=[]
current_team=[]

# Web Scraping
Now that we have done the necessary prepatations, we can scrape the website:

In [None]:
#This is used so transfermarkt thinks we are a webbrowser and not a scrapper
headers = {'User-Agent': 
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}

In [None]:
#iterate through all players
for i in range(len(ids)):
    #webpage
    page="https://www.transfermarkt.co.uk/"+players_html[i]+"/leistungsdatendetails/spieler/"+ids[i]+"/saison/2018/verein/0/liga/0/wettbewerb//pos/0/trainer_id/0/plus/1"
    #grab the code of the page and assign it to ‘pageTree‘
    pageTree = requests.get(page, headers=headers)
    #parses the website code into html
    pageSoup = BeautifulSoup(pageTree.content, 'html.parser')
    #‘find_all’ function to look for the tags in the page
    stats.append(pageSoup.find_all("td", {"class": "zentriert"}))
    total.append(pageSoup.find_all("td", {"class": "rechts"}))
    heights.append(pageSoup.find_all("span", {"class": "dataValue"}))
    current_team.append(pageSoup.find_all("a", {"class": "vereinprofil_tooltip"}))

# Get data from HTML code
Now that we have the html code, we can get the data we need for the Analysis from it.

In [None]:
#Get the current team
team_now=[]
num=0
for player in current_team:
    num=num+1
    try:
        team_now.append(player[1].find("img")["alt"])
    except TypeError:
        team_now.append(np.nan)
    except IndexError:
        team_now.append(np.nan)

In [None]:
#converse the data to numeric
def str_conv(lst):
    new_list=[]
    for item in lst:
        if item == "-":
            new_list.append(0)
        else:
            try:
                new_list.append(float(item))
            except ValueError:
                new_list.append(np.nan)
    return new_list

In [None]:
#make the list transformation so I can work with the data
def list_transform(webdata,index):
    lst=[]
    for i in range(len(webdata)):
        if webdata[i]==[]:#If empty --> np.nan
            lst.append(np.nan)
        else:
            if webdata==total:
                number=webdata[i][index].text.replace("'","")#remove "'" from numbers
                lst.append(number.replace(".",""))
            elif webdata==heights:
                number=webdata[i][index].text.replace(",","")#remove "'" from numbers
                lst.append(number.replace(" m",""))#remove " m" from height
            else:
                lst.append(webdata[i][index].text)
    lst=str_conv(lst)
    return lst

In [None]:
squad=list_transform(stats,0)

In [None]:
games_played=list_transform(stats,1)

In [None]:
PPG=list_transform(stats,2)

In [None]:
goals=list_transform(stats,3)


In [None]:
assists=list_transform(stats,4)

In [None]:
own_goals=list_transform(stats,5)

In [None]:
sub_on=list_transform(stats,6)

In [None]:
sub_off=list_transform(stats,7)

In [None]:
yellow_cards=list_transform(stats,8)

In [None]:
red_cards=list_transform(stats,9)

In [None]:
two_yellows=list_transform(stats,10)

In [None]:
penalty_goals=list_transform(stats,11)

In [None]:
minutes_field=list_transform(total,2)

In [None]:
minutes_GK=list_transform(total,1)

In [None]:
height=list_transform(heights,3)

In [None]:
#get position of player
positions=[]
for i in range(len(heights)):
    position=heights[i][4].text[9:].replace(" ","")
    positions.append(position)

# Data Handling
Now that we have all the data we need in columns, we can create the transfer fee dataframe and perform the necessary data transformations.

In [None]:
#Create dataframe and drop rows where data missing
df = pd.DataFrame({"Player":players,"ID":ids,"Position":positions,"Current Team (2020/21)":team_now,"Height (cm)":height,"Games in Squad":squad,"Games Played":games_played,"Points per game":PPG,"Goals":goals,"Assists":assists,"Own goals":own_goals,"Games subbed on":sub_on, "Games subbed off":sub_off,"Yellow Cards":yellow_cards,"Red Cards":red_cards,"Two Yellow Cards":two_yellows,"Penalty Goals":penalty_goals,"Minutes Field":minutes_field,"Minutes GK":minutes_GK})
df.dropna(inplace=True)

In [None]:
#Set player name as index
df.index=df["Player"]
df.drop("Player", axis=1, inplace=True)
df

In [None]:
#If goalkeeper --> 1, else-->0
df["Position"]=df["Position"]=="Goalkeeper"
df["Position"]=df["Position"].astype(int)
df.rename(columns={'Position': 'Position (GK: 1, Other: 0)'},inplace=True)
df

In [None]:
#save dataframe
df.to_pickle("stats_tm.pkl")