Python script to scrape player in-game statistics data from fbref.com and merge with player valuations. Extracted data from 10 professional soccer leagues around the world.

In [1]:
# libraries
import pandas as pd
import numpy as np
import requests
import time
import random
import fuzzywuzzy as fuzz

years_special = ['2022-2023']
numbers = [11,20,9,12,30,13,40,37,23,32]
stats = ['passing','defense','keepers','playingtime']
leagues = ['-Serie-A-Stats','-Bundesliga-Stats',
           '-Premier-League-Stats','-La-Liga-Stats','-Russian-Premier-League-Stats',
           '-Ligue-1-Stats','-Scottish-Premiership-Stats','-Belgian-Pro-League-Stats','-Eredivisie-Stats',
           '-Primeira-Liga-Stats']
           
league_names = ['Serie A','Bundesliga','Premier League','La Liga', 
               'Premier Liga', 'Ligue 1', 'Scottish Premiership',
               'Jupiler Pro League', 'Eredivisie', 'Liga Nos']
                
keepers = []
defense = []
passing = []
playingtime = []
count = 0
for n in range(len(numbers)):
    for t in stats:
        for z in years_special:
            data = 'https://fbref.com/en/comps/' + str(numbers[count]) + '/' + z + '/' + t + '/' + z + leagues[count]
            response = requests.get(data).text.replace('<!--', '').replace('-->', '')
            df = pd.read_html(response, header=1)[2]
            df['Based'] = league_names[count]
            if t == 'passing':
                passing.append(df)
            elif t == 'defense':
                defense.append(df)
            elif t == 'keepers':
                keepers.append(df)
            else:
                playingtime.append(df)
            time.sleep(3)
            print('Imported ' + t + ' data for ' + league_names[count])

    count += 1

Imported passing data for Serie A
Imported defense data for Serie A
Imported keepers data for Serie A
Imported playingtime data for Serie A
Imported passing data for Bundesliga
Imported defense data for Bundesliga
Imported keepers data for Bundesliga
Imported playingtime data for Bundesliga
Imported passing data for Premier League
Imported defense data for Premier League
Imported keepers data for Premier League
Imported playingtime data for Premier League
Imported passing data for La Liga
Imported defense data for La Liga
Imported keepers data for La Liga
Imported playingtime data for La Liga
Imported passing data for Premier Liga
Imported defense data for Premier Liga
Imported keepers data for Premier Liga
Imported playingtime data for Premier Liga
Imported passing data for Ligue 1
Imported defense data for Ligue 1
Imported keepers data for Ligue 1
Imported playingtime data for Ligue 1
Imported passing data for Scottish Premiership
Imported defense data for Scottish Premiership
Import

In [2]:
for k in range(4):
    
    if stats[k] == 'passing':
        count = 0
        for df in passing:
            # rename columns
            df.rename(columns={'Cmp':'Total Cmp', 'Att': 'Total Att', 'Cmp%':'Total Cmp%','Cmp.1':'Short Cmp', 
                               'Att.1':'Short Att','Cmp%.1':'Short Cmp%', 'Cmp.2':'Medium Cmp', 'Att.2':'Medium Att',
                               'Cmp%.2':'Medium Cmp%', 'Cmp.3':'Long Cmp', 'Att.3':'Long Att', 'Cmp%.3':'Long Cmp%'}, inplace = True)
            
            df = df.drop(columns=['Rk','Nation','Pos','Matches','Short Cmp','Medium Cmp','Long Cmp',
                      'Short Att','Medium Att','Long Att','Total Cmp','Total Att'])
            passing[count] = df
            count += 1


    elif stats[k] == 'defense':
        count = 0
        for df in defense:

            # rename columns
            df['Age'] = df['Age'].str[:2]
            df['Position'] = df['Pos'].str[:2]
            df['Position'] = df['Position'].replace({'MF': 'Midfielder', 'DF': 'Defender', 'FW': 'Forward', 'GK': 'Goalkeeper'})

            df = df.drop(columns=['Rk','Nation','Pos','Matches'])

            df.rename(columns={'Sh':'Blocks Sh','Pass':'Blocks Pass','Tkl.1':'drTkl'}, inplace = True)
            
            defense[count] = df
            count += 1
            
    elif stats[k] == 'playingtime':
        count = 0
        for df in playingtime:

            # rename columns
            df['Age'] = df['Age'].str[:2]

            df = df.drop(columns=['Rk','Nation','Pos','Matches','MP','Min','Mn/MP','Min%','Starts',
                                  'Mn/Start','Compl','Subs','Mn/Sub','unSub'])
            df.rename(columns={'On-Off.1':'xG_On-Off'}, inplace = True)

            
            playingtime[count] = df
            count += 1

    else:
        count = 0
        for df in keepers:

            # rename columns
            df['Age'] = df['Age'].str[:2]

            df = df.drop(columns=['Rk','Nation','Pos','Matches','PKatt','PKA','PKsv','PKm','Save%.1',
                                 'MP','Starts','Min','W','D','L'])
            
            keepers[count] = df
            count += 1
            
    df_gk = pd.concat(keepers)
    df_def = pd.concat(defense)
    df_pass = pd.concat(passing)
    df_pt = pd.concat(playingtime)

final = pd.merge(pd.merge(pd.merge(df_gk,df_def,on=['Player','Squad','Born','Based','Age','90s']),
                 df_pass,on=['Player','Squad','Born','Based','Age','90s']),
                 df_pt,on=['Player','Squad','Born','Based','Age','90s'])

final['Born'] = final['Born'].astype(str)
final['Born'] = final['Born'].str[-2:]
final['player_code'] = final['Player'].str.lower().str.replace(' ','-') + final['Born'].astype(str)

replacements = {
    'à': 'a', 'á': 'a', 'â': 'a', 'ä': 'a', 'ǎ': 'a', 'æ': 'a', 'ã': 'a', 'å': 'a', 'ā': 'a',
    'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'ě': 'e', 'ẽ': 'e', 'ē': 'e', 'ė': 'e', 'ę': 'e',
    'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ǐ': 'i', 'ĩ': 'i', 'ī': 'i', 'ı': 'i', 'į': 'i',
    'ò': 'o', 'ó': 'o', 'ô': 'o', 'ö': 'o', 'ǒ': 'o', 'œ': 'o', 'ø': 'o', 'õ': 'o', 'ō': 'o',
    'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ǔ': 'u', 'ũ': 'u', 'ū': 'u', 'ű': 'u', 'ů': 'u',
    'ğ':'g','ġ':'g','Ğ':'G','Ġ':'G','ç':'c','ć':'c','č':'c','ċ':'c','ł':'l','ļ':'l','ľ':'l',
    'ß':'s','ş':'s','ș':'s','ś':'s','š':'s','ķ':'k','ñ':'n','ń':'n','ņ':'n','ň':'n','ŵ':'w',
    'ź':'z','ž':'z','ż':'z','þ':'b','đ':'d',"'":'',"’":'','ý':'y','i̇':'i','ă':'a','ř':'r'
}

for key in replacements:
    final['player_code'] = final['player_code'].str.replace(key, replacements[key])
final

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,onGA,+/-,+/-90,On-Off,onxG,onxGA,xG+/-,xG+/-90,xG_On-Off,player_code
0,Emil Audero,Sampdoria,25,97,25.0,39,1.56,121,82,71.9,...,39,-28,-1.12,+0.34,21.4,37.7,-16.3,-0.65,+0.57,emil-audero97
1,Francesco Bardi,Bologna,30,92,1.0,0,0.00,4,4,100.0,...,0,+3,+3.00,+2.97,0.7,0.8,-0.1,-0.14,-0.01,francesco-bardi92
2,Marco Carnesecchi,Cremonese,22,00,27.0,47,1.74,153,107,70.6,...,47,-22,-0.81,+0.19,28.1,44.3,-16.2,-0.60,+0.42,marco-carnesecchi00
3,Michele Cerofolini,Fiorentina,23,99,5.0,3,0.60,11,8,81.8,...,3,+10,+2.00,+2.00,8.5,4.1,+4.4,+0.88,+0.66,michele-cerofolini99
4,Andrea Consigli,Sassuolo,35,87,35.0,55,1.57,117,64,55.6,...,55,-10,-0.29,+1.05,48.4,44.0,+4.4,+0.13,+0.96,andrea-consigli87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386,Bruno Varela,Vitória,27,94,27.0,25,0.93,97,72,75.3,...,25,+3,+0.11,+1.25,28.4,31.4,-3.0,-0.11,+0.33,bruno-varela94
387,Igor Vekič,Paços,24,98,9.1,17,1.86,34,17,52.9,...,17,-10,-1.10,-0.05,9.8,11.9,-2.2,-0.24,+0.61,igor-vekic98
388,Odisseas Vlachodimos,Benfica,28,94,34.0,20,0.59,72,53,75.0,...,20,+62,+1.83,+1.83,87.1,22.9,+64.2,+1.89,+4.57,odisseas-vlachodimos94
389,Ivan Zlobin,Famalicão,25,97,1.0,2,2.00,5,3,60.0,...,2,0,0.00,+0.24,1.5,1.6,-0.1,-0.13,+0.18,ivan-zlobin97


In [3]:
final[final.duplicated(['player_code','Born'], keep=False)]

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,onGA,+/-,+/-90,On-Off,onxG,onxGA,xG+/-,xG+/-90,xG_On-Off,player_code
11,Pierluigi Gollini,Napoli,27,95,4.0,4,1.0,16,12,75.0,...,4,1,0.25,-1.16,5.9,4.6,1.3,0.32,-0.61,pierluigi-gollini95
12,Pierluigi Gollini,Fiorentina,27,95,3.0,2,0.67,8,6,75.0,...,2,1,0.33,0.08,3.3,3.3,0.0,-0.01,-0.35,pierluigi-gollini95
30,Ionuț Radu,Cremonese,25,97,9.0,19,2.11,64,45,75.0,...,19,-12,-1.33,-0.61,8.6,19.1,-10.5,-1.17,-0.59,ionuț-radu97
70,Jonas Omlin,M'Gladbach,28,94,14.3,18,1.26,56,38,71.4,...,18,2,0.14,0.39,21.5,21.1,0.4,0.03,0.11,jonas-omlin94
78,Yann Sommer,M'Gladbach,33,88,10.0,14,1.4,60,47,80.0,...,14,4,0.4,0.69,17.5,12.8,4.7,0.47,0.72,yann-sommer88
79,Yann Sommer,Bayern Munich,33,88,19.0,25,1.32,55,30,67.3,...,25,18,0.95,-1.45,39.7,22.4,17.3,0.91,-0.46,yann-sommer88
144,Benjamin Lecomte,Espanyol,31,91,10.0,14,1.4,40,27,65.0,...,14,-4,-0.4,0.06,11.2,11.2,0.0,0.0,0.41,benjamin-lecomte91
153,Fernando Pacheco,Espanyol,30,92,16.0,33,2.06,88,54,67.0,...,33,-10,-0.62,-0.31,23.1,31.4,-8.3,-0.52,-0.37,fernando-pacheco92
154,Fernando Pacheco,Almería,30,92,1.0,2,2.0,5,3,60.0,...,2,-1,-1.0,-0.59,1.0,1.0,0.0,0.0,0.46,fernando-pacheco92
160,Gerónimo Rulli,Villarreal,30,92,14.0,10,0.71,56,46,82.1,...,10,5,0.36,-0.23,17.1,14.1,3.1,0.22,-0.14,geronimo-rulli92


In [4]:
pv = pd.read_csv('transfermarkt_data.csv')
pv['Born']=pv['date_of_birth'].str[-2:].astype(str)

merged = pd.merge(final,pv,on=['player_code','Squad','Born','Based']).drop(['current_club_domestic_competition_id','first_name',
                                                             'last_name','player_club_domestic_competition_id',
                                                    'club_id','last_season','current_club_name','club_code','date',
                                                                    'current_club_id','player_id','Unnamed: 0','name',
                                                                   'date_of_birth'], axis=1)
merged

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,xG+/-,xG+/-90,xG_On-Off,player_code,country_of_birth,sub_position,foot,height_in_cm,contract_expiration_date,market_value_in_eur
0,Emil Audero,Sampdoria,25,97,25.0,39,1.56,121,82,71.9,...,-16.3,-0.65,+0.57,emil-audero97,Indonesia,Goalkeeper,right,192.0,6/30/26 0:00,6000000
1,Francesco Bardi,Bologna,30,92,1.0,0,0.00,4,4,100.0,...,-0.1,-0.14,-0.01,francesco-bardi92,Italy,Goalkeeper,right,188.0,6/30/23 0:00,500000
2,Marco Carnesecchi,Cremonese,22,00,27.0,47,1.74,153,107,70.6,...,-16.2,-0.60,+0.42,marco-carnesecchi00,Italy,Goalkeeper,right,191.0,6/30/23 0:00,13000000
3,Michele Cerofolini,Fiorentina,23,99,5.0,3,0.60,11,8,81.8,...,+4.4,+0.88,+0.66,michele-cerofolini99,Italy,Goalkeeper,right,188.0,6/30/24 0:00,300000
4,Andrea Consigli,Sassuolo,35,87,35.0,55,1.57,117,64,55.6,...,+4.4,+0.13,+0.96,andrea-consigli87,Italy,Goalkeeper,right,189.0,6/30/24 0:00,1500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,João Valido,Arouca,22,00,0.0,0,0.00,0,0,,...,0.0,0.00,+0.32,joao-valido00,Portugal,Goalkeeper,right,185.0,6/30/25 0:00,100000
353,Bruno Varela,Vitória,27,94,27.0,25,0.93,97,72,75.3,...,-3.0,-0.11,+0.33,bruno-varela94,Portugal,Goalkeeper,right,191.0,6/30/24 0:00,4000000
354,Igor Vekič,Paços,24,98,9.1,17,1.86,34,17,52.9,...,-2.2,-0.24,+0.61,igor-vekic98,,Goalkeeper,right,192.0,6/30/23 0:00,300000
355,Odisseas Vlachodimos,Benfica,28,94,34.0,20,0.59,72,53,75.0,...,+64.2,+1.89,+4.57,odisseas-vlachodimos94,Germany,Goalkeeper,right,191.0,6/30/27 0:00,16000000


In [5]:
merged[merged.duplicated(['player_code'], keep=False)]

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,xG+/-,xG+/-90,xG_On-Off,player_code,country_of_birth,sub_position,foot,height_in_cm,contract_expiration_date,market_value_in_eur


In [6]:
merged.to_csv('gk_data_2023.csv')

In [7]:
merged.columns

Index(['Player', 'Squad', 'Age', 'Born', '90s', 'GA', 'GA90', 'SoTA', 'Saves',
       'Save%', 'CS', 'CS%', 'Based', 'Tkl', 'TklW', 'Def 3rd', 'Mid 3rd',
       'Att 3rd', 'drTkl', 'Att', 'Tkl%', 'Lost', 'Blocks', 'Blocks Sh',
       'Blocks Pass', 'Int', 'Tkl+Int', 'Clr', 'Err', 'Position', 'Total Cmp%',
       'TotDist', 'PrgDist', 'Short Cmp%', 'Medium Cmp%', 'Long Cmp%', 'Ast',
       'xAG', 'xA', 'A-xAG', 'KP', '1/3', 'PPA', 'CrsPA', 'PrgP', 'PPM', 'onG',
       'onGA', '+/-', '+/-90', 'On-Off', 'onxG', 'onxGA', 'xG+/-', 'xG+/-90',
       'xG_On-Off', 'player_code', 'country_of_birth', 'sub_position', 'foot',
       'height_in_cm', 'contract_expiration_date', 'market_value_in_eur'],
      dtype='object')

In [8]:
tr = pd.read_csv('2022_transfers.csv')
bleh = pd.merge(merged,tr,on=['player_code','Squad']).drop(['Unnamed: 0','age','contract_expiration_date','player_name','season'],axis=1)
bleh

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,xG+/-90,xG_On-Off,player_code,country_of_birth,sub_position,foot,height_in_cm,market_value_in_eur,fee,transfer_period
0,Alessio Cragno,Monza,28,94,1.0,3,3.00,5,2,40.0,...,-1.05,-0.93,alessio-cragno94,Italy,Goalkeeper,right,184.0,4000000,400000.0,Summer
1,Michele Di Gregorio,Monza,25,97,37.0,49,1.32,167,119,72.5,...,-0.12,+0.93,michele-di-gregorio97,Italy,Goalkeeper,right,187.0,7000000,4000000.0,Summer
2,Bartłomiej Drągowski,Spezia,24,97,32.7,54,1.65,164,112,69.5,...,-0.56,-0.65,bartlomiej-drągowski97,Poland,Goalkeeper,right,191.0,4500000,2000000.0,Summer
3,Wladimiro Falcone,Lecce,27,95,38.0,46,1.21,156,112,73.7,...,-0.16,,wladimiro-falcone95,Italy,Goalkeeper,right,195.0,5000000,0.0,Summer
4,Federico Marchetti,Spezia,39,83,0.7,2,2.73,3,1,33.3,...,+0.38,+0.87,federico-marchetti83,Italy,Goalkeeper,left,188.0,50000,0.0,Winter
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,Stanislav Kritsyuk,Gil Vicente FC,31,90,10.3,15,1.45,55,40,74.5,...,-0.46,-0.26,stanislav-kritsyuk90,UdSSR,Goalkeeper,right,192.0,1500000,0.0,Summer
98,Giorgi Makaridze,Marítimo,32,90,6.0,11,1.83,20,9,55.0,...,-0.03,+0.42,giorgi-makaridze90,UdSSR,Goalkeeper,right,194.0,700000,0.0,Winter
99,Samuel Soares,Benfica,20,02,0.0,0,0.00,1,1,100.0,...,-2.68,-4.57,samuel-soares02,Portugal,Goalkeeper,right,190.0,2000000,0.0,Summer
100,Matouš Trmal,Marítimo,23,98,6.0,5,0.83,26,21,84.6,...,-0.11,+0.32,matous-trmal98,Czech Republic,Goalkeeper,right,192.0,700000,50000.0,Summer


In [9]:
squ = pd.read_csv('squad_comparisons.csv')
oop = pd.merge(bleh,squ,on=['Squad'])
oop[['+/-90','90s']] = oop[['+/-90','90s']].astype(float)
oop

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,sq_Poss,sq_Gls,sq_Ast,sq_Gls/90,sq_Ast/90,opp_sq_Poss,opp_sq_Gls,opp_sq_Ast,opp_sq_Gls/90,opp_sq_Ast/90
0,Alessio Cragno,Monza,28,94,1.0,3,3.00,5,2,40.0,...,-0.7,-12.0,-3.0,-0.32,-0.07,0.9,15.0,11.0,0.39,0.29
1,Michele Di Gregorio,Monza,25,97,37.0,49,1.32,167,119,72.5,...,-0.7,-12.0,-3.0,-0.32,-0.07,0.9,15.0,11.0,0.39,0.29
2,Bartłomiej Drągowski,Spezia,24,97,32.7,54,1.65,164,112,69.5,...,4.7,-9.0,-7.0,-0.24,-0.19,-4.7,-9.0,-3.0,-0.24,-0.08
3,Federico Marchetti,Spezia,39,83,0.7,2,2.73,3,1,33.3,...,4.7,-9.0,-7.0,-0.24,-0.19,-4.7,-9.0,-3.0,-0.24,-0.08
4,Wladimiro Falcone,Lecce,27,95,38.0,46,1.21,156,112,73.7,...,-11.2,-27.0,-17.0,-0.71,-0.45,11.4,13.0,13.0,0.34,0.34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,Ignacio De Arruabarrena,Arouca,25,97,30.0,32,1.07,134,99,79.1,...,-2.0,6.0,5.0,0.18,0.15,1.7,-16.0,-16.0,-0.47,-0.47
97,João Valido,Arouca,22,00,0.0,0,0.00,0,0,,...,-2.0,6.0,5.0,0.18,0.15,1.7,-16.0,-16.0,-0.47,-0.47
98,Franco Israel,Sporting CP,22,00,3.0,3,1.00,4,1,25.0,...,3.3,-5.0,6.0,-0.15,0.18,-3.0,10.0,9.0,0.29,0.27
99,Stanislav Kritsyuk,Gil Vicente FC,31,90,10.3,15,1.45,55,40,74.5,...,-0.7,-16.0,-13.0,-0.47,-0.38,0.6,1.0,11.0,0.03,0.32


In [10]:
# Define conditions and corresponding calculations
conditions = [
    (oop["Position"] == "Goalkeeper")
]

calculations = [
    (oop['+/-90'] - oop['opp_sq_Gls/90'])*oop['90s']
]

# Apply conditions and calculations to create the "Target" column
oop['Target'] = np.select(conditions, calculations, default=np.nan)

In [11]:
oop[oop['Position']=='Goalkeeper'].sort_values(['Target'],ascending=False)

Unnamed: 0,Player,Squad,Age,Born,90s,GA,GA90,SoTA,Saves,Save%,...,sq_Gls,sq_Ast,sq_Gls/90,sq_Ast/90,opp_sq_Poss,opp_sq_Gls,opp_sq_Ast,opp_sq_Gls/90,opp_sq_Ast/90,Target
34,Nick Pope,Newcastle Utd,30,92,36.2,32,0.88,118,85,74.6,...,23.0,19.0,0.60,0.50,-12.5,-29.0,-20.0,-0.76,-0.52,62.626
6,Ivan Provedel,Lazio,28,94,37.9,30,0.79,128,99,77.3,...,-15.0,-11.0,-0.40,-0.29,3.5,-28.0,-19.0,-0.74,-0.50,57.987
70,Brice Samba,Lens,28,94,37.0,28,0.76,125,98,80.0,...,6.0,10.0,0.16,0.26,-4.8,-20.0,-12.0,-0.52,-0.32,56.240
74,Walter Benítez,PSV Eindhoven,29,93,30.0,35,1.17,106,71,68.9,...,3.0,-3.0,0.09,-0.09,2.4,-2.0,-5.0,-0.06,-0.14,44.700
66,Steve Mandanda,Rennes,37,85,33.5,31,0.93,110,79,73.6,...,-15.0,-14.0,-0.39,-0.37,0.9,-1.0,0.0,-0.03,0.00,32.160
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31,Neto,Bournemouth,33,89,26.5,39,1.47,138,100,74.6,...,-36.0,-28.0,-0.62,-0.50,16.9,32.0,26.0,1.01,0.79,-38.690
72,Lennart Moser,Eupen,22,99,30.5,66,2.16,173,108,65.9,...,3.0,1.0,0.09,0.02,2.6,12.0,15.0,0.36,0.44,-42.090
77,Mickey van der Hart,Emmen,28,94,25.0,44,1.76,122,78,65.6,...,-30.0,-28.0,-0.70,-0.68,9.4,40.0,30.0,1.25,0.94,-49.250
21,Alexander Schwolow,Schalke 04,30,92,22.6,58,2.57,138,80,60.9,...,-37.0,-30.0,-1.09,-0.88,6.5,26.0,14.0,0.77,0.41,-51.302


In [12]:
gk_merger = oop.drop(['sq_Poss','sq_Gls','sq_Ast','sq_Gls/90','sq_Ast/90','opp_sq_Poss',
 'opp_sq_Gls','opp_sq_Ast','opp_sq_Gls/90','opp_sq_Ast/90'], axis=1)

In [13]:
list(gk_merger.columns)

['Player',
 'Squad',
 'Age',
 'Born',
 '90s',
 'GA',
 'GA90',
 'SoTA',
 'Saves',
 'Save%',
 'CS',
 'CS%',
 'Based',
 'Tkl',
 'TklW',
 'Def 3rd',
 'Mid 3rd',
 'Att 3rd',
 'drTkl',
 'Att',
 'Tkl%',
 'Lost',
 'Blocks',
 'Blocks Sh',
 'Blocks Pass',
 'Int',
 'Tkl+Int',
 'Clr',
 'Err',
 'Position',
 'Total Cmp%',
 'TotDist',
 'PrgDist',
 'Short Cmp%',
 'Medium Cmp%',
 'Long Cmp%',
 'Ast',
 'xAG',
 'xA',
 'A-xAG',
 'KP',
 '1/3',
 'PPA',
 'CrsPA',
 'PrgP',
 'PPM',
 'onG',
 'onGA',
 '+/-',
 '+/-90',
 'On-Off',
 'onxG',
 'onxGA',
 'xG+/-',
 'xG+/-90',
 'xG_On-Off',
 'player_code',
 'country_of_birth',
 'sub_position',
 'foot',
 'height_in_cm',
 'market_value_in_eur',
 'fee',
 'transfer_period',
 'Target']

In [14]:
gk_merger.to_csv('final_gk_data.csv')