# U.S. Major League Soccer Salaries Data Analysis

#### Importing Pandas and Numpy


In [17]:
import pandas as pd
import numpy as np

#### Reading file and printing first 10 rows for summary 

In [18]:
salaries = pd.read_csv("mls-salaries-2017.csv")
salaries.head(10) 

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,ATL,Almiron,Miguel,M,1912500.0,2297000.0
1,ATL,Ambrose,Mikey,D,65625.0,65625.0
2,ATL,Asad,Yamil,M,150000.0,150000.0
3,ATL,Bloom,Mark,D,99225.0,106573.89
4,ATL,Carleton,Andrew,F,65000.0,77400.0
5,ATL,Carmona,Carlos,M,675000.0,725000.0
6,ATL,Garza,Greg,D,150000.0,150000.0
7,ATL,Gonzalez Pirez,Leandro,D,250008.0,285008.0
8,ATL,Goslin,Chris,M,70000.0,74000.0
9,ATL,Gressel,Julian,M,75000.0,93750.0


#### Length of rows and columns of data

In [19]:
print("Length of Rows: {}".format(salaries.shape[0]))
print("Length of Columns: {}".format(salaries.shape[1]))

Length of Rows: 616
Length of Columns: 6


#### Average salaries of soccer players

In [20]:
print("Average of salary of soccer players: {}".format(round(salaries.base_salary.mean(),2)))

Average of salary of soccer players: 296977.74


#### Highest Salary

In [21]:
salaries[salaries["base_salary"].max() == salaries["base_salary"]]

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
401,ORL,Kaka,,M,6660000.0,7167500.0


#### Lowest Salary

In [22]:
salaries[salaries["base_salary"].min() == salaries["base_salary"]]

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
348,NYCFC,Okoli,Sean,F,52999.92,52999.92
523,SEA,Tolo,Nouhou,D,52999.92,52999.92


#### Positions of MLS

In [23]:
for i in salaries["position"].unique():
    print("Position: ",i)

Position:  M
Position:  D
Position:  F
Position:  GK
Position:  M-F
Position:  F-M
Position:  D-M
Position:  M-D
Position:  M/F
Position:  F/M


#### Data set showing how many players played in each position

In [24]:
print(salaries["position"].value_counts())

M      212
D      185
F      109
GK      65
M-F     19
D-M     14
F-M      6
M-D      4
F/M      1
M/F      1
Name: position, dtype: int64


#### Average Salary by positions

In [25]:
salaries.groupby("position").mean()

Unnamed: 0_level_0,base_salary,guaranteed_compensation
position,Unnamed: 1_level_1,Unnamed: 2_level_1
D,166574.093784,179533.184811
D-M,134930.0,154328.927857
F,489588.598349,557437.206514
F-M,335834.0,367920.805
F/M,125000.0,131250.0
GK,146472.515538,158665.155692
M,376106.218632,406781.788396
M-D,219032.25,230282.25
M-F,212975.604211,231104.551579
M/F,550000.0,565000.0


#### Data set showing how many players played in team

In [26]:
print(salaries["club"].value_counts())

VAN      32
PHI      31
ATL      31
CLB      30
ORL      30
DAL      29
SJ       29
NYCFC    28
NYRB     28
HOU      28
MNUFC    27
LA       27
CHI      27
POR      27
DC       27
RSL      27
MTL      27
TOR      27
COL      26
KC       26
SEA      25
NE       23
LAFC      2
Name: club, dtype: int64


#### Min, Mean and Max Salary and compensation for each team

In [27]:
salaries.groupby("club").aggregate(["min",np.mean,"max"])

Unnamed: 0_level_0,base_salary,base_salary,base_salary,guaranteed_compensation,guaranteed_compensation,guaranteed_compensation
Unnamed: 0_level_1,min,mean,max,min,mean,max
club,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ATL,53000.0,259213.420645,1912500.0,53000.0,290572.847097,2297000.0
CHI,53004.0,454541.62963,5400000.0,53004.0,479714.006296,5400000.0
CLB,53004.0,211507.749333,1050000.0,53004.0,224918.166333,1050000.0
COL,53004.0,279028.798077,2000000.0,54075.0,309254.605769,2475000.0
DAL,53000.0,206571.69931,784000.0,53000.0,235371.066897,880890.0
DC,53004.0,178227.238519,500000.0,56754.0,195275.84963,602000.0
HOU,53000.0,175144.642143,650000.0,53004.0,181829.166071,665000.0
KC,53000.0,239378.769231,850000.0,53000.0,253398.106923,850000.0
LA,53004.0,364209.875556,3750000.0,53004.0,451218.374444,5500000.0
LAFC,53004.0,59004.0,65004.0,53004.0,59004.0,65004.0
