In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np

salary2022 = pd.read_csv("mls-salaries-2022.csv")

salary2022.head()

Unnamed: 0,Club,Nickname,Last Name,First Name,Position,2022 Base Salary,2022 Guar. Comp.
0,Atlanta United,,Almada,Thiago,M-F,"$1,650,000.00","$2,332,000.00"
1,Atlanta United,,Alonso,Osvaldo,M,"$84,000.00","$84,000.00"
2,Atlanta United,Mikey,Ambrose,Mikey,D,"$85,444.00","$85,444.00"
3,Atlanta United,,Araujo,Luiz,F-M,"$3,600,000.00","$4,480,333.00"
4,Atlanta United,,Barco,Ezequiel,M,"$2,200,000.00","$2,200,000.00"


In [2]:
# Clean salary amounts and convert them to numerical value
# Clean Base Salay column
salary2022[salary2022.columns[5]] = salary2022[salary2022.columns[5]].replace('[\$,]', '', regex=True)
salary2022[salary2022.columns[5]] = pd.to_numeric(salary2022[salary2022.columns[5]])
# Clean Guaranteeed Comp. column
salary2022[salary2022.columns[6]] = salary2022[salary2022.columns[6]].replace('[\$,]', '', regex=True)
salary2022[salary2022.columns[6]] = pd.to_numeric(salary2022[salary2022.columns[6]])
# Check outcome
print(salary2022[salary2022.columns[5]].head())
print(salary2022[salary2022.columns[6]].head())

# Some last names have "n" in front of them let's correct it.
salary2022['Last Name'] = salary2022['Last Name'].str.replace('^n', '', regex=True)

# Creating full name column
salary2022['name'] = salary2022['First Name'] + ' ' + salary2022['Last Name'].fillna('')
salary2022['name'] = salary2022['name'].str.replace('  ', ' ')

print(salary2022.name.head())

0    1650000.0
1      84000.0
2      85444.0
3    3600000.0
4    2200000.0
Name: 2022 Base Salary, dtype: float64
0    2332000.0
1      84000.0
2      85444.0
3    4480333.0
4    2200000.0
Name: 2022 Guar. Comp., dtype: float64
0     Thiago Almada
1    Osvaldo Alonso
2     Mikey Ambrose
3       Luiz Araujo
4    Ezequiel Barco
Name: name, dtype: object


In [3]:
# Remove diacritical marks from the 'name' column
from unidecode import unidecode

salary2022['name'] = salary2022['name'].apply(unidecode)
salary2022['name'] = salary2022['name'].str.title()

In [4]:
for i in salary2022['name']:
    print(i)

Thiago Almada
Osvaldo Alonso
Mikey Ambrose
Luiz Araujo
Ezequiel Barco
George Campbell
Dylan Castanheira
Erik Centeno
Machop Chol
Ronaldo Cisneros
Jackson Conway
Alex De John
Dom Dwyer
Alan Franco
Justin Garces
Raaol Gudia+-O
Andrew Gutman
Brad Guzan
Ronald Hernandez
Emerson Hyndman
Franco Ibarra
Brooks Lennon
Erik Lopez Samaniego
Josef Martinez
Aiden Mcfadden
Efrain Morales
Marcelino Moreno
Edwin Mosquera
Juan Jose Purata 
Rocco Rios-Novo
Miles Robinson
Matheus Rossetto
Amar Sejdic
Santiago Sosa
Bryce Washington
Caleb Wiley 
Tyler Wolff
Carlos Asensio
Julio Cascante
Washington Corozo 
Moussa Djitte
Sebastian Driussi
Diego Fagundez
Ethan Finlay
Ruben Gabrielsen
Jon Gallagher
Danny Hoesen
Hector Jimenez
Kipp Keller 
Freddy Kleeman
Zan Kolmanic
Damian Las
Nick Lima
Felipe Martins
Danny Pereira
Tomas Pochettino
William Pulisic
Rodney Redes
Emiliano Rigoni
Alexander Ring
Jhohan Romana
Jared Stroud
Brad Stuver
Andrew Tarbell
Maximiliano Urruti
Jhojan Valencia 
Owen Wolff
Jean-Aniel Assi
Zorh

In [5]:
nan = salary2022.loc[salary2022['Last Name'].isna(), 'First Name']
nan

194          Max
322         Nanu
354         Zeca
612    Luquinhas
765       Judson
766    Rodrigues
859     Auro Jr.
Name: First Name, dtype: object

In [6]:
# Some names have nicknames in them (Mohammed "Mo" Adams). 
# Need to erase those so it would be first and last only (Mohammed Adams)
salary2022['name'] = salary2022['name'].str.replace('"\w+" ', '')
for i in salary2022.name:
    print(i)

Thiago Almada
Osvaldo Alonso
Mikey Ambrose
Luiz Araujo
Ezequiel Barco
George Campbell
Dylan Castanheira
Erik Centeno
Machop Chol
Ronaldo Cisneros
Jackson Conway
Alex De John
Dom Dwyer
Alan Franco
Justin Garces
Raaol Gudia+-O
Andrew Gutman
Brad Guzan
Ronald Hernandez
Emerson Hyndman
Franco Ibarra
Brooks Lennon
Erik Lopez Samaniego
Josef Martinez
Aiden Mcfadden
Efrain Morales
Marcelino Moreno
Edwin Mosquera
Juan Jose Purata 
Rocco Rios-Novo
Miles Robinson
Matheus Rossetto
Amar Sejdic
Santiago Sosa
Bryce Washington
Caleb Wiley 
Tyler Wolff
Carlos Asensio
Julio Cascante
Washington Corozo 
Moussa Djitte
Sebastian Driussi
Diego Fagundez
Ethan Finlay
Ruben Gabrielsen
Jon Gallagher
Danny Hoesen
Hector Jimenez
Kipp Keller 
Freddy Kleeman
Zan Kolmanic
Damian Las
Nick Lima
Felipe Martins
Danny Pereira
Tomas Pochettino
William Pulisic
Rodney Redes
Emiliano Rigoni
Alexander Ring
Jhohan Romana
Jared Stroud
Brad Stuver
Andrew Tarbell
Maximiliano Urruti
Jhojan Valencia 
Owen Wolff
Jean-Aniel Assi
Zorh

  salary2022['name'] = salary2022['name'].str.replace('"\w+" ', '')


In [7]:
# Check Club names 
for i in salary2022.Club.unique():
    print(i)
# It appears that some players have "MLS" as a club, which can not be the case since its a League not a club
mls = salary2022[salary2022.Club == 'Major League Soccer']
mls

Atlanta United
Austin FC
CF Montreal
Charlotte FC
Chicago Fire
Colorado Rapids
Columbus Crew
DC United
FC Cincinnati
FC Dallas
Houston Dynamo
Inter Miami
LA Galaxy
LAFC
Major League Soccer
Minnesota United
Nashville SC
New England Revolution
New York City FC
New York Red Bulls
Orlando City SC
Philadelphia Union
Portland Timbers
Real Salt Lake
San Jose Earthquakes
Seattle Sounders FC
Sporting Kansas City
St. Louis SC
Toronto FC
Vancouver Whitecaps


Unnamed: 0,Club,Nickname,Last Name,First Name,Position,2022 Base Salary,2022 Guar. Comp.,name
451,Major League Soccer,,Barber,Grayson,F,84000.0,87750.0,Grayson Barber
452,Major League Soccer,,Corona,Joe,M,300000.0,315000.0,Joe Corona
453,Major League Soccer,,Hundley,Matt,F,110000.0,119000.0,Matt Hundley


In [8]:
# Adjust club name for players with MLS as club
salary2022.loc[salary2022['name'].str.contains('Grayson Barber'), 'Club'] = 'Sporting Kansas City'
salary2022.loc[salary2022['name'].str.contains('Joe Corona'), 'Club'] = 'Houston Dynamo'
salary2022.loc[salary2022['name'].str.contains('Matt Hundley'), 'Club'] = 'Colorado Rapids'

In [9]:
# Players stats df doesnt have stats for Charlotte FC and St. Louis SC. Let's remove it from salary.
salary2022 = salary2022[salary2022['Club'] != 'Charlotte FC']
salary2022 = salary2022[salary2022['Club'] != 'St. Louis SC']
# To make club names consistent let's remove FC CF SC from them
salary2022['Club'] = salary2022['Club'].str.replace("FC ","").str.replace(" FC", "").str.replace(" SC", "").str.replace("CF ", "")
clubs = np.sort(salary2022.Club.unique())

print(clubs)

['Atlanta United' 'Austin' 'Chicago Fire' 'Cincinnati' 'Colorado Rapids'
 'Columbus Crew' 'DC United' 'Dallas' 'Houston Dynamo' 'Inter Miami'
 'LA Galaxy' 'LAFC' 'Minnesota United' 'Montreal' 'Nashville'
 'New England Revolution' 'New York City' 'New York Red Bulls'
 'Orlando City' 'Philadelphia Union' 'Portland Timbers' 'Real Salt Lake'
 'San Jose Earthquakes' 'Seattle Sounders' 'Sporting Kansas City'
 'Toronto' 'Vancouver Whitecaps']


In [10]:
# Abbreviations list
abb = ['ATL','ATX','CHI', 'CIN', 'COL', 'CLB', 'DC', 'DAL', 'HOU', 'MIA', 'LA', 'LAFC', 'MIN', 'MTL', 'NSH', 'NE', 'NYC', 'RBNY', 'ORL', 'PHI', 'POR', 'RSL', 'SJ', 'SEA', 'SKC', 'TOR', 'VAN']
# Creation of dictionary
abb_dic = {}
abb_dic = {clubs[i]: abb[i] for i in range(len(clubs))}
# Creation of abbreaviation column based of created dictionary
salary2022['club_abb'] = salary2022['Club'].map(abb_dic)
salary2022.head()

Unnamed: 0,Club,Nickname,Last Name,First Name,Position,2022 Base Salary,2022 Guar. Comp.,name,club_abb
0,Atlanta United,,Almada,Thiago,M-F,1650000.0,2332000.0,Thiago Almada,ATL
1,Atlanta United,,Alonso,Osvaldo,M,84000.0,84000.0,Osvaldo Alonso,ATL
2,Atlanta United,Mikey,Ambrose,Mikey,D,85444.0,85444.0,Mikey Ambrose,ATL
3,Atlanta United,,Araujo,Luiz,F-M,3600000.0,4480333.0,Luiz Araujo,ATL
4,Atlanta United,,Barco,Ezequiel,M,2200000.0,2200000.0,Ezequiel Barco,ATL


In [11]:
# Import dataframe with players statistics 
stats = pd.read_csv("MLS2021.csv")
stats.head()

Unnamed: 0,name,club_abb,club,2021 guaranteed comp.,position,birth_city,birth_country,"height, cm","weight, kg",foot,...,accurate_goal_kicks,dive_catch,inaccurate_goal_kicks,penalty_faced,pen_goals_conceded,punches,drops,good_claim,saves_per,goals_against_average
0,Valentin Castellanos,NYC,New York City,776000.0,Forward,Mendoza,Argentina,178.0,70.0,Right,...,0,0,0,0,3,0,0,0,0.0,0.95
1,Ola Kamara,DC,DC United,420000.0,Forward,,,185.0,82.0,,...,0,0,0,0,2,0,0,0,0.0,1.43
2,Javier Hernandez,LA,LA Galaxy,6000000.0,Forward,Guadalajara,Mexico,175.0,73.0,Right,...,0,0,0,0,3,0,0,0,0.0,1.4
3,Raul Ruidiaz,SEA,Seattle Sounders,2100000.0,Forward,Lima,Peru,169.0,65.0,Right,...,0,0,0,0,3,0,0,0,0.0,0.92
4,Adam Buksa,NE,New England Revolutio,1106250.0,Forward,Krakow,Poland,191.0,78.0,Both,...,0,0,0,0,2,0,0,0,0.0,1.04


In [12]:
# Check which names from salary2022 are not present in stats
not_in_stats = ~salary2022['name'].isin(stats['name'])

# Print names from salary2022 that are not present in stats
for i in salary2022[not_in_stats]['name']:
    print(i)
print(len(salary2022[not_in_stats]['name']))

Thiago Almada
Erik Centeno
Ronaldo Cisneros
Justin Garces
Raaol Gudia+-O
Aiden Mcfadden
Edwin Mosquera
Juan Jose Purata 
Rocco Rios-Novo
Caleb Wiley 
Carlos Asensio
Washington Corozo 
Ruben Gabrielsen
Kipp Keller 
Freddy Kleeman
Damian Las
Emiliano Rigoni
Jhojan Valencia 
Gabriele Corbo
Kei Kamara
Logan Ketterer
Ismael Kona(C)
Jojea Kwizera
Nathan-Dylan Saliba
Victor Bezerra
Kendall Burks
Rafael Czichos
Jhon Jader Duran
Sergio Oregel
Missael Rodriguez
Xherdan Shaqiri
Jairo Torres 
Lucas Esteves 
Felipe Gutierrez
Anthony Markanich 
Sam Nicholson
Jackson Travis
Max 
Miloa! Degenek
Luis Diaz 
Mohamed Farsi
Cucho Hernandez
James Igbekeme
Jake Morris
Jacen Russell-Rowe
Will Sands
Patrick Schulte 
Yaw Yeboah
Sean Zawadzki
Matai Akinmboni
Christian Benteke
Sofiane Djeffal
Chris Durkin
Kristian Fletcher 
Taxiarchis Fountas
Jeremy Garay
Jackson Hopkins
Theodore Ku-Dipietro
Ravel Morrison
Victor Palsson
Martin Rodriguez
Rafael Romo
Gaoussou Samake
Hayden Sargis
Roman Celentano
Ray Gaddis
Evan Lo

In [13]:
# Adjust salary2022 some names that are differently written comparing to stats df
salary2022.loc[salary2022['name'].str.contains('Kleeman'), 'name'] = 'Freddy Kleemann'
salary2022.loc[salary2022['name'].str.contains('Adalberto'), 'name'] = 'Adalberto Carrasquilla'
salary2022.loc[salary2022['name'].str.contains('Nkosi'), 'name'] = 'Nkosi Tafari'
salary2022.loc[salary2022['name'].str.contains('Assi'), 'name'] = 'Jean-Aniel Assi'
salary2022.loc[salary2022['name'].str.contains('Matko'), 'name'] = 'Matko Miljevic'
salary2022.loc[salary2022['name'].str.contains('Gudmundur'), 'name'] = 'Gudmundur Thorarinsson'
salary2022.loc[salary2022['name'].str.contains('Antonio Carlos'), 'name'] = 'Antonio Carlos'
salary2022.loc[salary2022['name'].str.contains('Ocimar'), 'name'] = 'Junior Urso'
salary2022.loc[salary2022['name'].str.contains('Matheus Alvarenga'), 'name'] = 'Matheus Davo'
salary2022.loc[salary2022['name'].str.contains('Bode Davis'), 'name'] = 'Bode Hidalgo'
salary2022.loc[salary2022['name'].str.contains('Jeizon Ramirez'), 'name'] = 'Jeizon Ramirez'
salary2022.loc[salary2022['name'].str.contains('Javier Lopez'), 'name'] = 'Javier Lopez'
salary2022.loc[salary2022['name'].str.contains('Joao Paulo'), 'name'] = 'Joao Paulo'
salary2022.loc[salary2022['name'].str.contains('Alan Pulido'), 'name'] = 'Alan Pulido'
salary2022.loc[salary2022['name'].str.contains('Caio Alexandre'), 'name'] = 'Caio Alexandre'
salary2022.loc[salary2022['name'].str.contains('Higuain'), 'name'] = 'Gonzalo Higuain'
salary2022.loc[salary2022['name'].str.contains('Blanco'), 'name'] = 'Sebastian Blanco'
salary2022.loc[salary2022['name'].str.contains('Auro'), 'name'] = 'Auro'
salary2022.loc[salary2022['name'].str.contains('Lopez'), 'name'] = 'Erik Lopez Samaniego'
salary2022.loc[salary2022['name'].str.contains('David Taylor'), 'name'] = 'Dj Taylor'

In [14]:
# Check which names from salary2022 are not present in stats
not_in_stats = ~salary2022['name'].isin(stats['name'])

# Print names from salary2022 that are not present in stats
for i in salary2022[not_in_stats]['name']:
    print(i)
print(len(salary2022[not_in_stats]['name']))

Thiago Almada
Erik Centeno
Ronaldo Cisneros
Justin Garces
Raaol Gudia+-O
Aiden Mcfadden
Edwin Mosquera
Juan Jose Purata 
Rocco Rios-Novo
Caleb Wiley 
Carlos Asensio
Washington Corozo 
Ruben Gabrielsen
Kipp Keller 
Damian Las
Emiliano Rigoni
Jhojan Valencia 
Gabriele Corbo
Kei Kamara
Logan Ketterer
Ismael Kona(C)
Jojea Kwizera
Nathan-Dylan Saliba
Victor Bezerra
Kendall Burks
Rafael Czichos
Jhon Jader Duran
Sergio Oregel
Missael Rodriguez
Xherdan Shaqiri
Jairo Torres 
Lucas Esteves 
Felipe Gutierrez
Anthony Markanich 
Sam Nicholson
Jackson Travis
Max 
Miloa! Degenek
Luis Diaz 
Mohamed Farsi
Cucho Hernandez
James Igbekeme
Jake Morris
Jacen Russell-Rowe
Will Sands
Patrick Schulte 
Yaw Yeboah
Sean Zawadzki
Matai Akinmboni
Christian Benteke
Sofiane Djeffal
Chris Durkin
Kristian Fletcher 
Taxiarchis Fountas
Jeremy Garay
Jackson Hopkins
Theodore Ku-Dipietro
Ravel Morrison
Victor Palsson
Martin Rodriguez
Rafael Romo
Gaoussou Samake
Hayden Sargis
Roman Celentano
Ray Gaddis
Evan Louro
Nicholas Ma

In [15]:
# Standardize the values in the name column for both dataframes by removing any leading or 
# trailing white spaces
stats['name'] = stats['name'].str.strip().str.replace('  ', ' ')
salary2022['name'] = salary2022['name'].str.strip().str.replace('  ', ' ')

In [16]:
# Merge the two dataframes on the matching names
stats_1 = pd.merge(stats, salary2022, left_on=['name'], right_on=['name'], how='left')
len(stats_1)

853

In [17]:
# Players from stats that dont have salary report
nan_rows = stats_1[stats_1['2022 Base Salary'].isna()]
for index, row in nan_rows.iterrows():
    print(index, row['name'], row['Last Name'])
    
print(len(nan_rows))

4 Adam Buksa nan
11 Ricardo Pepi nan
14 Javier Lopez nan
22 Luis Nani nan
26 Jesus Medina nan
31 Robert Beric nan
33 Tajon Buchanan nan
42 Luka Stojanovic nan
46 Cecilio Dominguez nan
48 Fabio Gomes nan
49 Adrien Hunou nan
60 Diego Rossi nan
72 Rayan Raveloson nan
76 Chris Wondolowski nan
77 Ignacio Aliseda nan
79 Andre Shinyashiki nan
98 Tyler Pasher nan
99 Yordy Reyna nan
102 Robbie Robinson nan
103 Brian Rodriguez nan
107 Ramon Abila nan
119 Carlos Fierro nan
125 Federico Higuain nan
131 Alvaro Medran nan
132 Haris Medunjanin nan
133 Joao Paulo nan
138 Kevin Paredes nan
146 Yeferson Soteldo nan
147 Silvester Van Der Water nan
156 Oswaldo Alanis nan
159 Yamil Asad nan
160 Eduard Atuesta nan
167 Jhonder Cadiz nan
177 Edison Flores nan
189 Jaylin Lindsey nan
190 Marvin Loria nan
199 Nathan Cardoso nan
205 Andy Rose nan
211 Gudmundur Thorarinsson nan
212 Arnor Traustason nan
213 Diego Valeri nan
215 Anton Walkes nan
220 Fanendo Adi nan
232 George Bello nan
236 Bruno Gaspar nan
238 Frede

In [18]:
stats_1

Unnamed: 0,name,club_abb_x,club,2021 guaranteed comp.,position,birth_city,birth_country,"height, cm","weight, kg",foot,...,saves_per,goals_against_average,Club,Nickname,Last Name,First Name,Position,2022 Base Salary,2022 Guar. Comp.,club_abb_y
0,Valentin Castellanos,NYC,New York City,776000.0,Forward,Mendoza,Argentina,178.0,70.0,Right,...,0.0,0.95,New York City,Taty,Castellanos,Valentin,M,900000.0,1076000.0,NYC
1,Ola Kamara,DC,DC United,420000.0,Forward,,,185.0,82.0,,...,0.0,1.43,DC United,,Kamara,Ola,F,1500000.0,1520000.0,DC
2,Javier Hernandez,LA,LA Galaxy,6000000.0,Forward,Guadalajara,Mexico,175.0,73.0,Right,...,0.0,1.40,LA Galaxy,Chicharito,Hernandez,Javier,F,6000000.0,7443750.0,LA
3,Raul Ruidiaz,SEA,Seattle Sounders,2100000.0,Forward,Lima,Peru,169.0,65.0,Right,...,0.0,0.92,Seattle Sounders,,Ruidiaz,Raul,F,2472000.0,3201120.0,SEA
4,Adam Buksa,NE,New England Revolutio,1106250.0,Forward,Krakow,Poland,191.0,78.0,Both,...,0.0,1.04,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
848,Adrian Zendejas,MIN,Minnesota United,90208.0,Goalkeeper,Chula Vista,USA,196.0,84.0,,...,,,,,,,,,,
849,Kyle Zobeck,DAL,Dallas,85444.0,Goalkeeper,"Coralville, IA",USA,186.0,82.0,,...,,,,,,,,,,
850,Rida Zouhir,MTL,Montreal,71730.0,Midfielder,,Canada,178.0,73.0,,...,,0.00,Montreal,,Zouhir,Rida,M,66724.0,74809.0,MTL
851,Dario Zuparic,POR,Portland Timbers,541667.0,Defender,Zupanja,Croatia,185.0,84.0,Both,...,0.0,1.50,Portland Timbers,,Zuparic,Dario,D,375000.0,579200.0,POR


In [19]:
# Drop players from merged dataframe that don't have 2022 salary report
stats_1.dropna(subset = ['2022 Base Salary'], axis=0, inplace= True, how = 'any')
stats_1.head()

Unnamed: 0,name,club_abb_x,club,2021 guaranteed comp.,position,birth_city,birth_country,"height, cm","weight, kg",foot,...,saves_per,goals_against_average,Club,Nickname,Last Name,First Name,Position,2022 Base Salary,2022 Guar. Comp.,club_abb_y
0,Valentin Castellanos,NYC,New York City,776000.0,Forward,Mendoza,Argentina,178.0,70.0,Right,...,0.0,0.95,New York City,Taty,Castellanos,Valentin,M,900000.0,1076000.0,NYC
1,Ola Kamara,DC,DC United,420000.0,Forward,,,185.0,82.0,,...,0.0,1.43,DC United,,Kamara,Ola,F,1500000.0,1520000.0,DC
2,Javier Hernandez,LA,LA Galaxy,6000000.0,Forward,Guadalajara,Mexico,175.0,73.0,Right,...,0.0,1.4,LA Galaxy,Chicharito,Hernandez,Javier,F,6000000.0,7443750.0,LA
3,Raul Ruidiaz,SEA,Seattle Sounders,2100000.0,Forward,Lima,Peru,169.0,65.0,Right,...,0.0,0.92,Seattle Sounders,,Ruidiaz,Raul,F,2472000.0,3201120.0,SEA
5,Damir Kreilach,RSL,Real Salt Lake,1350000.0,Midfielder,Vukovar,Croatia,186.0,79.0,Right,...,0.0,1.5,Real Salt Lake,,Kreilach,Damir,M,1762572.0,1762572.0,RSL


In [20]:
# Drop some columns that we don't need no more
stats_1.drop(['Club','Nickname','Last Name','First Name', 'Position', '2022 Base Salary', 'club_abb_y'],
    axis=1,
    inplace=True)

# Make all headers lowercase
stats_1.columns = stats_1.columns.str.lower()

# Rename club_abb_x
stats_1 = stats_1.rename(columns={'club_abb_x': 'club_abb'})

# Move 2022 salary column to the beginning 
# Get the list of all column names
cols = stats_1.columns.tolist()
# Identify the last column
last_col = cols[-1:]
# Remove the last column from the list of all columns
cols = cols[:-1]
# Insert the last column in the 5th position
cols = cols[:4] + last_col + cols[4:]
# Reindex the dataframe using the new column order
stats_1 = stats_1.reindex(columns=cols)

# Reset index of stats_1
stats_1 = stats_1.reset_index(drop=True)

In [21]:
stats_1

Unnamed: 0,name,club_abb,club,2021 guaranteed comp.,2022 guar. comp.,position,birth_city,birth_country,"height, cm","weight, kg",...,accurate_goal_kicks,dive_catch,inaccurate_goal_kicks,penalty_faced,pen_goals_conceded,punches,drops,good_claim,saves_per,goals_against_average
0,Valentin Castellanos,NYC,New York City,776000.0,1076000.0,Forward,Mendoza,Argentina,178.0,70.0,...,0,0,0,0,3,0,0,0,0.0,0.95
1,Ola Kamara,DC,DC United,420000.0,1520000.0,Forward,,,185.0,82.0,...,0,0,0,0,2,0,0,0,0.0,1.43
2,Javier Hernandez,LA,LA Galaxy,6000000.0,7443750.0,Forward,Guadalajara,Mexico,175.0,73.0,...,0,0,0,0,3,0,0,0,0.0,1.40
3,Raul Ruidiaz,SEA,Seattle Sounders,2100000.0,3201120.0,Forward,Lima,Peru,169.0,65.0,...,0,0,0,0,3,0,0,0,0.0,0.92
4,Damir Kreilach,RSL,Real Salt Lake,1350000.0,1762572.0,Midfielder,Vukovar,Croatia,186.0,79.0,...,0,0,0,0,3,0,0,0,0.0,1.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
618,Eriq Zavaleta,TOR,Toronto,115503.0,84000.0,Defender,Westfield,USA,186.0,83.0,...,0,0,0,0,3,0,0,0,0.0,2.21
619,Gedion Zelalem,NYC,New York City,81375.0,84000.0,Midfielder,Berlin,Germany,180.0,74.0,...,0,0,0,0,0,0,0,0,,0.00
620,Rida Zouhir,MTL,Montreal,71730.0,74809.0,Midfielder,,Canada,178.0,73.0,...,0,0,0,0,0,0,0,0,,0.00
621,Dario Zuparic,POR,Portland Timbers,541667.0,579200.0,Defender,Zupanja,Croatia,185.0,84.0,...,0,0,0,0,6,0,0,0,0.0,1.50


In [22]:
# Save to csv file
stats_1.to_csv('MLS_pl_2021_stats_w_2022_salaries.csv', index = False)