### Import Libraries

In [5]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

data = pd.read_csv('dataset.csv')

## Data read

# Data Cleaning

### Create a copy of DataFrame

In [9]:
fifa = data.copy()

In [10]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18979 entries, 0 to 18978
Data columns (total 77 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                18979 non-null  int64 
 1   Name              18979 non-null  object
 2   LongName          18979 non-null  object
 3   photoUrl          18979 non-null  object
 4   playerUrl         18979 non-null  object
 5   Nationality       18979 non-null  object
 6   Age               18979 non-null  int64 
 7   ↓OVA              18979 non-null  int64 
 8   POT               18979 non-null  int64 
 9   Club              18979 non-null  object
 10  Contract          18979 non-null  object
 11  Positions         18979 non-null  object
 12  Height            18979 non-null  object
 13  Weight            18979 non-null  object
 14  Preferred Foot    18979 non-null  object
 15  BOV               18979 non-null  int64 
 16  Best Position     18979 non-null  object
 17  Joined      

### 1. ID

Unique identifier for each player.

### 2. Name

Player's name

### 3. LongName

Player's full name

### 4. photoURL

URL to player's photo

### 5. playerURL

URL to player's profile

### 6. Nationality

Player's nationality

### 7. Age

Player's age in years

### 8. OVA - Overall Rating (0-100)

It is a rating system used to represent a player's overall abilities and skills in the game.

### 9. POT - Potential Rating(0-100)

Represent a player's potential to develop and improve their abilities and skills over time.

### 10. Club

Current Club of the player

In [13]:
fifa['Club'].dtype

dtype('O')

In [15]:
fifa['Club'].unique()

array(['\n\n\n\nFC Barcelona', '\n\n\n\nJuventus',
       '\n\n\n\nAtlético Madrid', '\n\n\n\nManchester City',
       '\n\n\n\nParis Saint-Germain', '\n\n\n\nFC Bayern München',
       '\n\n\n\nLiverpool', '\n\n\n\nReal Madrid', '\n\n\n\nChelsea',
       '\n\n\n\nTottenham Hotspur', '\n\n\n\nInter', '\n\n\n\nNapoli',
       '\n\n\n\nBorussia Dortmund', '\n\n\n\nManchester United',
       '\n\n\n\nArsenal', '\n\n\n\nLazio', '\n\n\n\nLeicester City',
       '\n\n\n\nBorussia Mönchengladbach', '\n\n\n\nReal Sociedad',
       '\n\n\n\nAtalanta', '\n\n\n\nOlympique Lyonnais', '\n\n\n\nMilan',
       '\n\n\n\nVillarreal CF', '\n\n\n\nRB Leipzig', '\n\n\n\nCagliari',
       '\n\n\n\nAjax', '\n\n\n\nSL Benfica', '\n\n\n\nAS Monaco',
       '\n\n\n\nWolverhampton Wanderers', '\n\n\n\nEverton',
       '\n\n\n\nFiorentina', '\n\n\n\nFC Porto', '\n\n\n\nRC Celta',
       '\n\n\n\nTorino', '\n\n\n\nSevilla FC', '\n\n\n\nGrêmio',
       '\n\n\n\nReal Betis', '\n\n\n\nRoma', '\n\n\n\nNewcastle Unite

In [17]:
fifa['Club'] = fifa['Club'].str.strip()

In [22]:
fifa['Club'].unique()

array(['FC Barcelona', 'Juventus', 'Atlético Madrid', 'Manchester City',
       'Paris Saint-Germain', 'FC Bayern München', 'Liverpool',
       'Real Madrid', 'Chelsea', 'Tottenham Hotspur', 'Inter', 'Napoli',
       'Borussia Dortmund', 'Manchester United', 'Arsenal', 'Lazio',
       'Leicester City', 'Borussia Mönchengladbach', 'Real Sociedad',
       'Atalanta', 'Olympique Lyonnais', 'Milan', 'Villarreal CF',
       'RB Leipzig', 'Cagliari', 'Ajax', 'SL Benfica', 'AS Monaco',
       'Wolverhampton Wanderers', 'Everton', 'Fiorentina', 'FC Porto',
       'RC Celta', 'Torino', 'Sevilla FC', 'Grêmio', 'Real Betis', 'Roma',
       'Newcastle United', 'Eintracht Frankfurt', 'Valencia CF',
       'Medipol Başakşehir FK', 'Inter Miami', 'Bayer 04 Leverkusen',
       'Levante UD', 'Crystal Palace', 'Athletic Club de Bilbao',
       'Shanghai SIPG FC', 'VfL Wolfsburg',
       'Guangzhou Evergrande Taobao FC', 'Al Shabab',
       'Olympique de Marseille', 'Los Angeles FC',
       'Beijing Sino

### 11. Contract column

In [25]:
fifa['Contract'].dtype

dtype('O')

In [28]:
fifa['Contract'].unique()

array(['2004 ~ 2021', '2018 ~ 2022', '2014 ~ 2023', '2015 ~ 2023',
       '2017 ~ 2022', '2017 ~ 2023', '2018 ~ 2024', '2014 ~ 2022',
       '2018 ~ 2023', '2016 ~ 2023', '2013 ~ 2023', '2011 ~ 2023',
       '2009 ~ 2022', '2005 ~ 2021', '2011 ~ 2021', '2015 ~ 2022',
       '2017 ~ 2024', '2010 ~ 2024', '2012 ~ 2021', '2019 ~ 2024',
       '2015 ~ 2024', '2017 ~ 2025', '2020 ~ 2025', '2019 ~ 2023',
       '2008 ~ 2023', '2015 ~ 2021', '2020 ~ 2022', '2012 ~ 2022',
       '2016 ~ 2025', '2013 ~ 2022', '2011 ~ 2022', '2012 ~ 2024',
       '2016 ~ 2021', '2012 ~ 2023', '2008 ~ 2022', '2019 ~ 2022',
       '2017 ~ 2021', '2013 ~ 2024', '2020 ~ 2024', '2010 ~ 2022',
       '2020 ~ 2021', '2011 ~ 2024', '2020 ~ 2023', '2014 ~ 2024',
       '2013 ~ 2026', '2016 ~ 2022', '2010 ~ 2021', '2013 ~ 2021',
       '2019 ~ 2025', '2018 ~ 2025', '2016 ~ 2024', '2018 ~ 2021',
       '2009 ~ 2024', '2007 ~ 2022', 'Jun 30, 2021 On Loan',
       '2009 ~ 2021', '2019 ~ 2021', '2019 ~ 2026', 'Free', '2012 ~ 

In [32]:
for index, row in fifa.iterrows():
    if 'On Loan' in row['Contract'] or 'Free' in row['Contract']:
        print(row['Contract'])

Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Free
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Free
Free
Free
Free
Free
Free
Free
Free
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Free
Free
Free
Jun 30, 2021 On Loan
Dec 31, 2020 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Free
Free
Free
Free
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Jan 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Dec 31, 2020 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Free
Free
Free

Jun 30, 2021 On Loan
May 31, 2021 On Loan
Dec 31, 2021 On Loan
Free
Jun 30, 2021 On Loan
May 31, 2021 On Loan
Free
Jun 23, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 23, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2022 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 23, 2021 On Loan
Jun 30, 2021 On Loan
Free
Dec 31, 2020 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Dec 31, 2020 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jan 3, 2021 On Loan
Jan 31, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan
Dec 31, 2020 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Jun 30, 2021 On Loan
Free
Jun 30, 2021 On Loan


In [38]:
def extract_contract_info(contract):
    if contract == 'Free' or 'On Loan' in contract:
        start_date = np.nan
        end_date = np.nan
        contract_length = 0
    else: 
        start_date, end_date = contract.split(' ~ ')
        start_year = int(start_date[:4])
        end_year = int(end_date[:4])
        contract_length = end_year - start_year
    return start_date, end_date, contract_length

In [45]:
# Apply fn to contract
new_cols = ['ContractStart', 'ContractEnd', 'ContractLength']
new_contract_data = fifa['Contract'].apply(lambda x: pd.Series(extract_contract_info(x)))

for i in range(len(new_cols)):
    fifa.insert(loc=fifa.columns.get_loc('Contract') + 1 + i, column=new_cols[i], value=new_contract_data[i])

ValueError: cannot insert ContractStart, already exists

In [50]:
fifa.ContractStart.isna().value_counts()

False    17729
True      1250
Name: ContractStart, dtype: int64

In [52]:
fifa.loc[:3, 'Contract':'ContractLength']

Unnamed: 0,Contract,ContractStart,ContractEnd,ContractLength
0,2004 ~ 2021,2004,2021,17.0
1,2018 ~ 2022,2018,2022,4.0
2,2014 ~ 2023,2014,2023,9.0
3,2015 ~ 2023,2015,2023,8.0


In [102]:
# Contract categories
def categorize_contract_status(contract):
    if contract == 'Free':
        return 'Free'
    elif contract == 'Loan':
        return 'On Loan'
    else:
        return 'Contract'
# Add this column
fifa.insert(loc=fifa.columns.get_loc('ContractLength') + 1, column='ContractStatus', value=fifa['Contract'].apply(categorize_contract_status))

In [103]:
fifa['ContractStatus'].unique()

array(['Contract', 'Free'], dtype=object)

### 12. Positions

Player's preferred position on the field

In [189]:
fifa['Positions'].sample(50)

9403         CM, CDM
15676             CB
10997             ST
10415        RWB, RB
9159          LM, LW
10556         ST, RM
10326             CB
4062              GK
12896             CB
6745              GK
890      ST, CAM, CF
8128     RWB, RB, CB
11821             ST
2288              RW
18651             GK
10678         ST, LM
5831              CB
15716             ST
16969             CM
6080     RW, CAM, LW
16147             CB
4968      LW, RW, ST
7540              ST
14156             CB
5928              LB
8770         CAM, CF
11713         RM, RW
17470        CAM, CM
17895             ST
11857         LB, CB
297               ST
14717         RW, LW
16032             GK
11059        CM, CAM
9965          RM, LM
5482              ST
2500              CB
16932             CM
14416             CB
12189         RB, LB
10408         CM, CB
14094         RM, RW
4021              ST
12799         RM, RW
15978             GK
1161     LB, LWB, CB
3255          LM, RM
17275        

### 13. Height

In [68]:
fifa['Height'].dtype

dtype('O')

In [69]:
fifa['Height'].unique()

array(['170cm', '187cm', '188cm', '181cm', '175cm', '184cm', '191cm',
       '178cm', '193cm', '185cm', '199cm', '173cm', '168cm', '176cm',
       '177cm', '183cm', '180cm', '189cm', '179cm', '195cm', '172cm',
       '182cm', '186cm', '192cm', '165cm', '194cm', '167cm', '196cm',
       '163cm', '190cm', '174cm', '169cm', '171cm', '197cm', '200cm',
       '166cm', '6\'2"', '164cm', '198cm', '6\'3"', '6\'5"', '5\'11"',
       '6\'4"', '6\'1"', '6\'0"', '5\'10"', '5\'9"', '5\'6"', '5\'7"',
       '5\'4"', '201cm', '158cm', '162cm', '161cm', '160cm', '203cm',
       '157cm', '156cm', '202cm', '159cm', '206cm', '155cm'], dtype=object)

In [74]:
# Convert heights
def convert_heights(height):
    if "cm" in height:
        return int(height.strip('cm'))
    else:
        feet, inches = height.split("'")
        total_inches = int(feet) * 12 + int(inches.strip('"'))
        return round(total_inches * 2.54)

In [76]:
# Apply the fn to height col
fifa['Height'] = fifa['Height'].apply(convert_heights)
fifa['Height'].unique()

array([170, 187, 188, 181, 175, 184, 191, 178, 193, 185, 199, 173, 168,
       176, 177, 183, 180, 189, 179, 195, 172, 182, 186, 192, 165, 194,
       167, 196, 163, 190, 174, 169, 171, 197, 200, 166, 164, 198, 201,
       158, 162, 161, 160, 203, 157, 156, 202, 159, 206, 155], dtype=int64)

### 14. Weight

In [78]:
fifa['Weight'].dtype

dtype('O')

In [80]:
fifa['Weight'].unique()

array(['72kg', '83kg', '87kg', '70kg', '68kg', '80kg', '71kg', '91kg',
       '73kg', '85kg', '92kg', '69kg', '84kg', '96kg', '81kg', '82kg',
       '75kg', '86kg', '89kg', '74kg', '76kg', '64kg', '78kg', '90kg',
       '66kg', '60kg', '94kg', '79kg', '67kg', '65kg', '59kg', '61kg',
       '93kg', '88kg', '97kg', '77kg', '62kg', '63kg', '95kg', '100kg',
       '58kg', '183lbs', '179lbs', '172lbs', '196lbs', '176lbs', '185lbs',
       '170lbs', '203lbs', '168lbs', '161lbs', '146lbs', '130lbs',
       '190lbs', '174lbs', '148lbs', '165lbs', '159lbs', '192lbs',
       '181lbs', '139lbs', '154lbs', '157lbs', '163lbs', '98kg', '103kg',
       '99kg', '102kg', '56kg', '101kg', '57kg', '55kg', '104kg', '107kg',
       '110kg', '53kg', '50kg', '54kg', '52kg'], dtype=object)

In [85]:
def convert_weights(weight):
    if 'kg' in weight:
        return int(weight.strip('kg'))
    else: 
        pounds = int(weight.strip('lbs'))
        return round(pounds / 2.205)

In [86]:
fifa['Weight'] = fifa['Weight'].apply(convert_weights)
fifa['Weight'].unique()

array([ 72,  83,  87,  70,  68,  80,  71,  91,  73,  85,  92,  69,  84,
        96,  81,  82,  75,  86,  89,  74,  76,  64,  78,  90,  66,  60,
        94,  79,  67,  65,  59,  61,  93,  88,  97,  77,  62,  63,  95,
       100,  58,  98, 103,  99, 102,  56, 101,  57,  55, 104, 107, 110,
        53,  50,  54,  52], dtype=int64)

### 15. Preferred Foot

Player's preferred foot - Left or Right

### 16. BOV

Best Overall Rating of the Player in a specific position

In [188]:
fifa['BOV'].sample(10)

10967    64
5521     69
11649    64
11758    64
3045     73
11249    65
8186     67
17992    56
5326     70
9207     66
Name: BOV, dtype: int64

### 17. Best Position

Player's best position on the field

In [186]:
fifa['Best Position'].sample(10)

16989     LB
1512     CDM
8946      CB
2160      GK
16455    CAM
7861      LW
17663     ST
11280     LB
1706      RM
9032      ST
Name: Best Position, dtype: object

### 18. Joined

Date when the player joined the current club

In [185]:
fifa['Joined'].sample(10)

16966    27-Sep-20
7701     04-Jul-17
10884    19-Aug-20
5106     29-Oct-19
13947    24-Sep-20
13716    13-Aug-19
12343    11-Jul-19
11431    25-Sep-20
16371    01-Jul-20
5076     27-Aug-19
Name: Joined, dtype: object

### 19. Loan Date End

Date when the player's loan ends (if On Loan)

In [89]:
fifa['Loan Date End'].dtype

dtype('O')

In [91]:
fifa['Loan Date End'].unique()

array([nan, '30-Jun-21', '31-Dec-20', '30-Jan-21', '30-Jun-22',
       '31-May-21', '05-Jul-21', '31-Dec-21', '01-Jul-21', '01-Jan-21',
       '31-Aug-21', '31-Jan-21', '30-Dec-21', '23-Jun-21', '03-Jan-21',
       '27-Nov-21', '17-Jan-21', '30-Jun-23', '31-Jul-21', '22-Nov-20',
       '31-May-22', '30-Dec-20', '04-Jan-21', '30-Nov-20', '01-Aug-21'],
      dtype=object)

In [97]:
on_loan = fifa[fifa['ContractStatus'] == 'Loan']
on_loan

Unnamed: 0,ID,Name,LongName,photoUrl,playerUrl,Nationality,Age,↓OVA,POT,Club,...,A/W,D/W,IR,PAC,SHO,PAS,DRI,DEF,PHY,Hits


### 20. Value

Estimated market value of the player.

In [182]:
fifa['Value'].sample(10)

2722       €3M
1807     €6.5M
1373     €3.7M
13753    €130K
15133    €550K
11786    €925K
9830     €1.5M
5488       €3M
1962     €6.5M
1475     €7.5M
Name: Value, dtype: object

### 21. Wage

Weekly wage of the player.

In [181]:
fifa['Wage'].sample(10)

637      €43K
16014    €500
3192      €2K
7266     €16K
5684      €6K
7167      €1K
13284      €0
18044     €2K
17413    €650
3662     €10K
Name: Wage, dtype: object

### 22. Release Clause

Release clause in the player's contract

In [180]:
fifa['Release Clause'].sample(50)

557      €32.6M
1574       €14M
3906      €3.5M
10584     €344K
7766         €0
18545     €102K
13052        €0
12600        €0
13        €119M
18517     €116K
9987      €1.6M
8940      €1.2M
8894        €1M
10197     €1.8M
18420     €218K
16052     €305K
8438      €1.8M
371      €43.5M
16350      €88K
9998      €725K
15239       €1M
2533      €8.6M
2010      €8.3M
7905      €163K
18460     €275K
7171      €1.3M
17845     €245K
14969     €297K
14150     €620K
10661     €1.1M
14739     €471K
804      €24.9M
1947     €17.6M
17269        €0
1646      €9.4M
12942     €866K
5271        €2M
17972     €356K
11731     €1.2M
11878     €890K
14958     €1.1M
5724      €2.1M
16104     €605K
13977     €525K
10555     €1.2M
14427     €634K
7010      €1.7M
18802     €131K
5024        €5M
15364     €305K
Name: Release Clause, dtype: object

### 23. Attacking

Composite rating of the player's attacking ability.

### 24. Crossing

Player's ability to make accurate crosses.

### 25. Finishing

Player's ability to score goals

### 26. Heading Accuracy

Player's ability to win aerial duels.

### 27. Short Passing

Player's ability to make short passes accurately.

### 28. Volleys

Player's ability to take volleys

### 29. Skill

Composite rating of the player's technical abilities.

### 30. Dribbling

Player's ability to dribble past defenders.

### 31. Curve

Player's ability to curve the ball.

### 32. FK Accuracy

Player's ability to take free kicks.

### 33. Long Passing

Player's ability to make long passes accurately.

### 34. Ball Control

Player's ability to control the ball.

In [176]:
fifa['Ball Control'].unique()

array([96, 92, 30, 95, 88, 89, 90, 77, 79, 23, 46, 83, 80, 85, 94, 40, 84,
       16, 74, 91, 87, 82, 78, 19, 61, 22, 34, 38, 81, 25, 86, 76, 69, 28,
       93, 75, 35, 60, 63, 73, 18, 71, 15, 21, 72, 14, 65, 20, 24, 27, 70,
       33, 17, 62, 64,  9, 68, 67, 32, 26, 66, 52, 11, 57, 58, 29, 12, 37,
       10, 36, 13, 31, 55, 59, 39, 54, 56, 48, 44, 51, 50, 47, 49, 53,  5,
       42,  8, 45, 43, 41,  7], dtype=int64)

### 35. Movement

Composite rating of the player's movement abilities.

In [177]:
fifa['Movement'].unique()

array([451, 431, 307, 398, 453, 407, 460, 268, 458, 254, 354, 343, 284,
       286, 388, 378, 424, 464, 420, 399, 437, 322, 367, 272, 328, 448,
       332, 425, 435, 391, 434, 400, 331, 349, 429, 416, 312, 326, 418,
       419, 417, 386, 321, 409, 374, 304, 403, 351, 401, 365, 414, 292,
       323, 299, 433, 350, 348, 413, 320, 281, 427, 353, 364, 410, 428,
       316, 381, 442, 375, 288, 395, 385, 251, 319, 444, 383, 298, 411,
       412, 415, 393, 397, 443, 423, 387, 422, 327, 390, 362, 352, 406,
       277, 361, 421, 396, 384, 450, 338, 363, 359, 287, 297, 430, 382,
       377, 380, 438, 449, 257, 371, 339, 341, 404, 345, 394, 295, 246,
       265, 258, 366, 294, 314, 266, 405, 218, 337, 267, 220, 376, 309,
       283, 426, 347, 244, 240, 291, 340, 250, 305, 290, 317, 334, 355,
       333, 389, 330, 318, 441, 402, 344, 335, 219, 264, 408, 274, 373,
       379, 256, 229, 392, 372, 360, 262, 346, 278, 248, 368, 279, 269,
       336, 342, 236, 370, 243, 315, 249, 227, 329, 239, 369, 22

### 36. Acceleration

Playr's ability to accelerate quickly.

### 37. Sprint Speed

Player's top speed

### 38. Agility

Player's agility

### 39. Reactions

Player's reaction time

### 40. Balance

Player's balance

### 41. Power

Composite rating of player's physical abilities

### 42. Shot Power

Player's ability to shoot with power

### 43. Jumping

Player's ability to jump high

### 44. Stamina

Player's stamina.

### 45. Strength

Player's physical strength.

### 46. Long Shots

Player's ability to score from long range

### 47. Mentality

Composite rating of the player's mental abilities.

### 48. Aggression

Player's aggression on the field.

### 49. Interceptions

Player's ability to intercept passes.

### 50. Positioning

Player's ability to position themselves well on the field.

### 51. Vision

Players ability to see and execute passes.

### 52. Penalties

Player's ability to score penalties.

### 53. Composure

Player's composure under pressure.

### 54. Defending

Composite rating of the player's defensive abilities.

### 55. Marking

Player's ability to mark opposing players.

In [175]:
fifa['Marking'].unique()

array([32, 28, 27, 68, 35, 38, 15, 34, 25, 93, 42, 84, 20, 17, 47, 85, 30,
       89, 82, 29, 56, 91, 72, 59, 79, 49, 83, 86, 50, 60, 94, 41, 57, 78,
       63, 88, 90,  9, 58, 74, 39, 92, 45, 36, 44, 87, 70, 76, 53, 80, 67,
       77, 12, 48, 55, 75, 81, 11, 64, 69, 14, 24, 52, 65, 19, 31, 13, 10,
       66, 71, 54, 46, 22, 40, 18, 51, 37, 43, 61, 26, 73, 21,  7, 33, 62,
       16, 23,  8,  6,  5,  4,  3], dtype=int64)

### 56. Standing Tackle

Player's ability to make standing tackles.

### 57. Sliding Tackle

Player's ability to make sliding tackles.

### 58. Goalkeeping

Composite rating of the player's goalkeeping abilities. Only for goalkeepers

### 59. GK Diving

Goalkeeper's ability to dive to make saves.

### 60. GK Handling

Goalkeeper's ability to handle the ball

### 61. GK Kicking

Goalkeeper's ability to kick the ball.

### 62. GK Positioning

Goalkeeper's ability to position themselves well.

### 63. GK Reflexes

Goalkeeper's reflexes

In [174]:
fifa['GK Reflexes'].unique()

array([ 8, 11, 90, 13, 10, 14, 89,  6, 12, 88,  7,  9, 15,  5,  3, 37, 85,
       86,  4, 16, 82, 83, 84, 87, 78, 80, 20, 18, 79, 81, 19, 77, 17,  2,
       74, 71, 76, 73, 75, 72, 69, 46, 66, 51, 70, 34, 67, 23, 68, 45, 65,
       21, 59, 54, 47, 61, 64, 63, 62, 60, 58, 56, 57, 55, 53, 50, 52, 49,
       48, 44], dtype=int64)

### 64. Total Stats

Total number of stats in the game

In [173]:
fifa['Total Stats'].unique()

array([2231, 2221, 1413, ...,  757,  747,  956], dtype=int64)

### 65. Base Stats

Number of stats used to calculate the player's overall rating.

In [172]:
fifa['Base Stats'].unique()

array([466, 464, 489, 485, 451, 457, 470, 490, 484, 455, 469, 463, 468,
       497, 442, 439, 473, 452, 498, 449, 477, 401, 446, 447, 465, 430,
       461, 422, 476, 460, 453, 467, 471, 399, 424, 441, 459, 438, 437,
       454, 428, 445, 431, 474, 421, 435, 448, 475, 403, 444, 443, 419,
       405, 420, 423, 396, 388, 482, 478, 385, 394, 480, 433, 450, 462,
       456, 436, 434, 429, 400, 440, 425, 410, 458, 398, 413, 373, 406,
       408, 472, 426, 407, 432, 427, 415, 481, 417, 372, 380, 418, 383,
       414, 409, 412, 411, 386, 362, 402, 390, 404, 391, 416, 375, 389,
       361, 397, 366, 392, 393, 382, 368, 387, 352, 376, 384, 378, 379,
       341, 354, 369, 395, 357, 381, 377, 344, 360, 370, 338, 333, 367,
       363, 349, 355, 345, 358, 348, 374, 351, 343, 342, 353, 321, 350,
       365, 364, 371, 327, 331, 359, 347, 356, 339, 319, 317, 335, 346,
       329, 315, 324, 322, 325, 332, 336, 337, 330, 316, 313, 306, 307,
       328, 310, 340, 308, 318, 334, 301, 289, 302, 320, 323, 32

### 66. W/F

Player's weak foot rating (out of 5)

In [105]:
fifa['W/F'].dtype

dtype('O')

In [108]:
fifa['W/F'].unique()

array(['4 ★', '3 ★', '5 ★', '2 ★', '1 ★'], dtype=object)

In [122]:
fifa['W/F'] = fifa['W/F'].str.replace('★', '')
fifa['W/F'] = fifa['W/F'].astype(int)
fifa['W/F'].unique()

array([4, 3, 5, 2, 1])

### 67. SM

Player's skill moves rating (out of 5)

In [170]:
fifa['SM'] = fifa['SM'].str.replace('★', '')
fifa['SM'] = fifa['SM'].astype(int)
fifa['SM'].unique()

array([4, 5, 1, 2, 3])

### 68. A/W

Player's attacking work rate (low, medium or high)

In [169]:
fifa['A/W'].unique()

array(['Medium', 'High', 'Low'], dtype=object)

### 69. D/W

Player's defensive work rate (low, medium or high)

In [168]:
fifa['D/W'].unique()

array(['Low', 'Medium', 'High'], dtype=object)

### 70. IR

Player's injury resistance rating (out of 3)

In [131]:
fifa['IR'] = fifa['IR'].str.replace('★', '')
fifa['IR'] = fifa['IR'].astype(int)
fifa['IR'].unique()

array([5, 3, 4, 2, 1])

### 71. PAC

Player's pace attribute rating(0-100).

It measures the player's speed on the pitch, including their acceleration and sprinting ability. 

A higher PAC value indicates that the player is faster.

In [133]:
fifa['PAC'].unique()

array([85, 89, 87, 76, 91, 78, 93, 86, 96, 88, 94, 65, 84, 74, 71, 77, 68,
       75, 54, 79, 83, 80, 81, 82, 63, 67, 90, 66, 42, 73, 70, 64, 57, 58,
       69, 72, 50, 59, 92, 60, 62, 55, 52, 56, 61, 53, 45, 37, 95, 43, 44,
       46, 48, 49, 47, 34, 39, 40, 51, 41, 36, 32, 33, 30, 31, 38, 35, 28,
       29, 25], dtype=int64)

### 72. SHO

Player's shooting attribute rating.(0-100)

In [134]:
fifa['SHO'].unique()

array([92, 93, 86, 85, 91, 88, 60, 73, 89, 87, 70, 90, 81, 66, 72, 82, 28,
       74, 77, 62, 50, 83, 69, 80, 46, 76, 54, 49, 61, 58, 79, 68, 59, 41,
       45, 64, 78, 55, 75, 65, 63, 48, 42, 56, 51, 30, 47, 84, 40, 57, 25,
       71, 37, 43, 53, 67, 38, 52, 39, 35, 36, 44, 32, 34, 33, 31, 27, 22,
       29, 26, 23, 18, 24, 20, 16, 21, 19, 17], dtype=int64)

### 73. PAS

Player's passing attribute rating.(0-100)

In [136]:
fifa['PAS'].unique()

array([91, 81, 78, 93, 86, 85, 88, 71, 80, 76, 74, 77, 79, 84, 73, 55, 83,
       87, 72, 75, 58, 89, 82, 68, 67, 64, 66, 59, 69, 90, 65, 53, 63, 62,
       70, 56, 42, 54, 61, 57, 60, 48, 52, 47, 46, 44, 45, 50, 51, 49, 43,
       36, 38, 40, 41, 35, 39, 34, 33, 37, 30, 32, 29, 31, 26, 28, 25, 27],
      dtype=int64)

### 74. DRI

Player's dribbling attribute rating.(0-100)

In [137]:
fifa['DRI'].unique()

array([95, 89, 90, 88, 94, 85, 91, 71, 72, 86, 73, 81, 84, 92, 80, 68, 77,
       87, 60, 83, 78, 64, 67, 79, 69, 66, 65, 70, 82, 75, 61, 74, 54, 76,
       49, 63, 59, 62, 56, 55, 50, 57, 58, 52, 53, 51, 48, 47, 46, 39, 44,
       43, 36, 40, 45, 41, 37, 34, 35, 42, 32, 38, 31, 33, 30, 29, 28, 25,
       27], dtype=int64)

### 75. DEF

Player's defending attribute rating.

In [138]:
fifa['DEF'].unique()

array([38, 35, 52, 64, 36, 43, 45, 51, 39, 91, 44, 86, 48, 57, 40, 88, 33,
       81, 63, 47, 53, 89, 71, 37, 80, 68, 85, 61, 90, 83, 49, 56, 58, 82,
       87, 79, 66, 55, 78, 32, 50, 76, 77, 70, 75, 41, 29, 73, 65, 59, 84,
       54, 72, 46, 42, 69, 34, 31, 30, 74, 24, 62, 25, 20, 26, 60, 27, 23,
       28, 67, 22, 19, 18, 21, 17, 15, 16, 12], dtype=int64)

### 76. PHY

Player's physical attribute rating.

In [140]:
fifa['PHY'].unique()

array([65, 77, 90, 78, 59, 82, 75, 91, 76, 88, 86, 85, 73, 67, 79, 63, 83,
       89, 66, 69, 72, 64, 71, 81, 87, 68, 84, 80, 55, 70, 44, 62, 51, 57,
       60, 58, 56, 74, 52, 61, 53, 45, 50, 54, 47, 48, 49, 42, 37, 40, 39,
       43, 38, 46, 41, 34, 35, 36, 31, 32, 33, 29, 28], dtype=int64)

### 77. Hits

Number of times the player has been searched for in the FIFA database.

In [142]:
fifa['Hits'].unique()

array(['771', '562', '150', '207', '595', '248', '246', '120', '1.6K',
       '130', '321', '189', '175', '96', '118', '216', '212', '154',
       '205', '202', '339', '408', '103', '332', '86', '173', '161',
       '396', '1.1K', '433', '242', '206', '177', '1.5K', '198', '459',
       '117', '119', '209', '84', '187', '165', '203', '65', '336', '126',
       '313', '124', '145', '538', '182', '101', '45', '377', '99', '194',
       '403', '414', '593', '374', '245', '3.2K', '266', '299', '309',
       '215', '265', '211', '112', '337', '70', '159', '688', '116', '63',
       '144', '123', '71', '224', '113', '168', '61', '89', '137', '278',
       '75', '148', '176', '197', '264', '214', '247', '402', '440',
       '1.7K', '2.3K', '171', '320', '657', '87', '259', '200', '255',
       '253', '196', '60', '97', '85', '169', '256', '132', '239', '166',
       '121', '109', '32', '46', '122', '48', '527', '199', '282', '51',
       '1.9K', '642', '155', '323', '288', '497', '509', '79',

In [166]:
fifa['Hits'].fillna(0, inplace=True)
def convert_hits_column(hits):
    if 'K' in str(hits):
        return int(hits.strip('K')) * 1000

    else: 
        return int(hits)
    
# fifa['Hits'] = fifa['Hits'].apply(convert_hits_column)

# fifa['Hits'].astype('int')
# fifa['Hits'].unique()

### Look at dtypes of all columns

In [190]:
fifa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18979 entries, 0 to 18978
Data columns (total 81 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   ID                18979 non-null  int64  
 1   Name              18979 non-null  object 
 2   LongName          18979 non-null  object 
 3   photoUrl          18979 non-null  object 
 4   playerUrl         18979 non-null  object 
 5   Nationality       18979 non-null  object 
 6   Age               18979 non-null  int64  
 7   ↓OVA              18979 non-null  int64  
 8   POT               18979 non-null  int64  
 9   Club              18979 non-null  object 
 10  Contract          18979 non-null  object 
 11  ContractStart     17729 non-null  object 
 12  ContractEnd       17729 non-null  object 
 13  ContractLength    18979 non-null  float64
 14  ContractStatus    18979 non-null  object 
 15  Positions         18979 non-null  object 
 16  Height            18979 non-null  int64 