In [None]:
import pandas as pd

In [None]:
!ls -l ex-data/fifa18

In [None]:
complete_data = pd.read_csv('ex-data/fifa18/CompleteDataset.csv', low_memory=False)
complete_data.head()

In [None]:
attribute_data = pd.read_csv('ex-data/fifa18/PlayerAttributeData.csv', low_memory=False)
attribute_data.head()

In [None]:
personal_data = pd.read_csv('ex-data/fifa18/PlayerPersonalData.csv')
personal_data.head()

In [None]:
personal_data.tail()

In [None]:
position_data = pd.read_csv('ex-data/fifa18/PlayerPlayingPositionData.csv')
position_data.head()

# 1 - Replace Value and Wage with actual numbers

In [None]:
import re
human_to_int_re = re.compile('.(?P<amount>\d+\.{0,1}\d*)(?P<unit>\w{0,1})')

def unit_to_mult(unit):
    if unit == 'G':
        return 1000000000
    if unit == 'M':
        return 1000000
    if unit == 'K':
        return 1000
    return 1

def to_numeric(elem):
    m = human_to_int_re.search(elem)
    unit = m.group('unit')
    mult = unit_to_mult(unit)
    amount = float(m.group('amount'))
    return int(amount * mult)

In [None]:
personal_data['IntValue'] = personal_data['Value'].apply(to_numeric)
personal_data.head()

In [None]:
personal_data['IntWage'] = personal_data['Wage'].apply(to_numeric)
personal_data.head()

# 2 - Check if there are any missing values

In [None]:
personal_data.isnull().any()

In [None]:
attribute_data.isnull().any()

In [None]:
complete_data.isnull().any()

In [None]:
position_data.isnull().any()

# 3 - Which teams have the most players?

In [None]:
personal_data.groupby('Club')['Club'].count().idxmax()

# 4 - Which team pays the largest total wages?

In [None]:
personal_data.groupby('Club').sum()['IntWage'].idxmax()

In [None]:
personal_data.groupby('Club')['IntWage'].sum().sort_values(ascending=False).head()

# 5 - Which team pays the largest average wage?

In [None]:
personal_data.groupby('Club')['IntWage'].mean().sort_values(ascending=False).head()

# 6 - How many players have exactly one preferred positions?

In [None]:
position_data['PP List'] = position_data['Preferred Positions'].apply(str.split)
print(len([x for x in position_data['PP List'] if len(x) == 1]))

In [None]:
position_data[position_data['PP List'].apply(len) == 1]

# 7 - How many players have at least three preferred positions?

In [None]:
len(position_data[position_data['PP List'].apply(len) >= 3])