## Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Read the csv and convert to a dataframe using pandas

In [2]:
df = pd.read_csv("data.csv")

## Have a look at the data(first 5 rows to get a better idea of columns and data)

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,2,190871,Neymar Jr,26,https://cdn.sofifa.org/players/4/19/190871.png,Brazil,https://cdn.sofifa.org/flags/54.png,92,93,Paris Saint-Germain,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M
3,3,193080,De Gea,27,https://cdn.sofifa.org/players/4/19/193080.png,Spain,https://cdn.sofifa.org/flags/45.png,91,93,Manchester United,...,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M
4,4,192985,K. De Bruyne,27,https://cdn.sofifa.org/players/4/19/192985.png,Belgium,https://cdn.sofifa.org/flags/7.png,91,92,Manchester City,...,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M


### This gives us an idea about the data, look for any missing values, statistical data, unique values etc.

In [4]:
# Here we include "all" so that even the categorical values are included. Next we take a look at just the one player and check 
# which of the columns don't provide necessary insight to our final visualization.
df.describe(include="all")

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
count,18207.0,18207.0,18207,18207.0,18207,18207,18207,18207.0,18207.0,17966,...,18159.0,18159.0,18159.0,18159.0,18159.0,18159.0,18159.0,18159.0,18159.0,16643
unique,,,17194,,18207,164,164,,,651,...,,,,,,,,,,1244
top,,,J. Rodríguez,,https://cdn.sofifa.org/players/4/19/177657.png,England,https://cdn.sofifa.org/flags/14.png,,,Manchester City,...,,,,,,,,,,€1.1M
freq,,,11,,1,1662,1662,,,33,...,,,,,,,,,,557
mean,9103.0,214298.338606,,25.122206,,,,66.238699,71.307299,,...,58.648274,47.281623,47.697836,45.661435,16.616223,16.391596,16.232061,16.388898,16.710887,
std,5256.052511,29965.244204,,4.669943,,,,6.90893,6.136496,,...,11.436133,19.904397,21.664004,21.289135,17.695349,16.9069,16.502864,17.034669,17.955119,
min,0.0,16.0,,16.0,,,,46.0,48.0,,...,3.0,3.0,2.0,3.0,1.0,1.0,1.0,1.0,1.0,
25%,4551.5,200315.5,,21.0,,,,62.0,67.0,,...,51.0,30.0,27.0,24.0,8.0,8.0,8.0,8.0,8.0,
50%,9103.0,221759.0,,25.0,,,,66.0,71.0,,...,60.0,53.0,55.0,52.0,11.0,11.0,11.0,11.0,11.0,
75%,13654.5,236529.5,,28.0,,,,71.0,75.0,,...,67.0,64.0,66.0,64.0,14.0,14.0,14.0,14.0,14.0,


In [5]:
# Here we find that the Total Count is 18207, but as the number of columns are too many we will remove the unnecessary ones.
df.columns

Index(['Unnamed: 0', 'ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag',
       'Overall', 'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
       'Preferred Foot', 'International Reputation', 'Weak Foot',
       'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
       'Jersey Number', 'Joined', 'Loaned From', 'Contract Valid Until',
       'Height', 'Weight', 'LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW',
       'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM', 'RM', 'LWB', 'LDM',
       'CDM', 'RDM', 'RWB', 'LB', 'LCB', 'CB', 'RCB', 'RB', 'Crossing',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingT

### Here we enumerate it so that it gets easier to delete the colums

In [6]:
for i,j in enumerate(df):
    print(i,j)

0 Unnamed: 0
1 ID
2 Name
3 Age
4 Photo
5 Nationality
6 Flag
7 Overall
8 Potential
9 Club
10 Club Logo
11 Value
12 Wage
13 Special
14 Preferred Foot
15 International Reputation
16 Weak Foot
17 Skill Moves
18 Work Rate
19 Body Type
20 Real Face
21 Position
22 Jersey Number
23 Joined
24 Loaned From
25 Contract Valid Until
26 Height
27 Weight
28 LS
29 ST
30 RS
31 LW
32 LF
33 CF
34 RF
35 RW
36 LAM
37 CAM
38 RAM
39 LM
40 LCM
41 CM
42 RCM
43 RM
44 LWB
45 LDM
46 CDM
47 RDM
48 RWB
49 LB
50 LCB
51 CB
52 RCB
53 RB
54 Crossing
55 Finishing
56 HeadingAccuracy
57 ShortPassing
58 Volleys
59 Dribbling
60 Curve
61 FKAccuracy
62 LongPassing
63 BallControl
64 Acceleration
65 SprintSpeed
66 Agility
67 Reactions
68 Balance
69 ShotPower
70 Jumping
71 Stamina
72 Strength
73 LongShots
74 Aggression
75 Interceptions
76 Positioning
77 Vision
78 Penalties
79 Composure
80 Marking
81 StandingTackle
82 SlidingTackle
83 GKDiving
84 GKHandling
85 GKKicking
86 GKPositioning
87 GKReflexes
88 Release Clause


In [7]:
cols = [0,4,6,10,22,23]
df.drop(df.columns[cols],axis=1,inplace=True)

In [8]:
df.drop(df.columns[28:54],axis=1,inplace=True)

In [9]:
df.head()

Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Special,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,€110.5M,€565K,2202,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,€77M,€405K,2228,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,€118.5M,€290K,2143,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M
3,193080,De Gea,27,Spain,91,93,Manchester United,€72M,€260K,1471,...,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M
4,192985,K. De Bruyne,27,Belgium,91,92,Manchester City,€102M,€355K,2281,...,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M


In [10]:
df.columns

Index(['ID', 'Name', 'Age', 'Nationality', 'Overall', 'Potential', 'Club',
       'Value', 'Wage', 'Special', 'Preferred Foot',
       'International Reputation', 'Weak Foot', 'Skill Moves', 'Work Rate',
       'Body Type', 'Real Face', 'Position', 'Loaned From',
       'Contract Valid Until', 'Height', 'Weight', 'LS', 'ST', 'RS', 'LW',
       'LF', 'CF', 'Curve', 'FKAccuracy', 'LongPassing', 'BallControl',
       'Acceleration', 'SprintSpeed', 'Agility', 'Reactions', 'Balance',
       'ShotPower', 'Jumping', 'Stamina', 'Strength', 'LongShots',
       'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties',
       'Composure', 'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving',
       'GKHandling', 'GKKicking', 'GKPositioning', 'GKReflexes',
       'Release Clause'],
      dtype='object')

In [11]:
for i,j in enumerate(df):
    print(i,j)

0 ID
1 Name
2 Age
3 Nationality
4 Overall
5 Potential
6 Club
7 Value
8 Wage
9 Special
10 Preferred Foot
11 International Reputation
12 Weak Foot
13 Skill Moves
14 Work Rate
15 Body Type
16 Real Face
17 Position
18 Loaned From
19 Contract Valid Until
20 Height
21 Weight
22 LS
23 ST
24 RS
25 LW
26 LF
27 CF
28 Curve
29 FKAccuracy
30 LongPassing
31 BallControl
32 Acceleration
33 SprintSpeed
34 Agility
35 Reactions
36 Balance
37 ShotPower
38 Jumping
39 Stamina
40 Strength
41 LongShots
42 Aggression
43 Interceptions
44 Positioning
45 Vision
46 Penalties
47 Composure
48 Marking
49 StandingTackle
50 SlidingTackle
51 GKDiving
52 GKHandling
53 GKKicking
54 GKPositioning
55 GKReflexes
56 Release Clause


In [12]:
df.drop(df.columns[22:28],axis=1,inplace=True)


In [13]:
df.head()

Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Special,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,€110.5M,€565K,2202,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,€77M,€405K,2228,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,€118.5M,€290K,2143,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M
3,193080,De Gea,27,Spain,91,93,Manchester United,€72M,€260K,1471,...,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M
4,192985,K. De Bruyne,27,Belgium,91,92,Manchester City,€102M,€355K,2281,...,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M


In [14]:
for i,j in enumerate(df):
    print(i,j)

0 ID
1 Name
2 Age
3 Nationality
4 Overall
5 Potential
6 Club
7 Value
8 Wage
9 Special
10 Preferred Foot
11 International Reputation
12 Weak Foot
13 Skill Moves
14 Work Rate
15 Body Type
16 Real Face
17 Position
18 Loaned From
19 Contract Valid Until
20 Height
21 Weight
22 Curve
23 FKAccuracy
24 LongPassing
25 BallControl
26 Acceleration
27 SprintSpeed
28 Agility
29 Reactions
30 Balance
31 ShotPower
32 Jumping
33 Stamina
34 Strength
35 LongShots
36 Aggression
37 Interceptions
38 Positioning
39 Vision
40 Penalties
41 Composure
42 Marking
43 StandingTackle
44 SlidingTackle
45 GKDiving
46 GKHandling
47 GKKicking
48 GKPositioning
49 GKReflexes
50 Release Clause


In [15]:
df.isnull().sum()

ID                              0
Name                            0
Age                             0
Nationality                     0
Overall                         0
Potential                       0
Club                          241
Value                           0
Wage                            0
Special                         0
Preferred Foot                 48
International Reputation       48
Weak Foot                      48
Skill Moves                    48
Work Rate                      48
Body Type                      48
Real Face                      48
Position                       60
Loaned From                 16943
Contract Valid Until          289
Height                         48
Weight                         48
Curve                          48
FKAccuracy                     48
LongPassing                    48
BallControl                    48
Acceleration                   48
SprintSpeed                    48
Agility                        48
Reactions     

In [16]:
df.drop(["Loaned From"],axis=1,inplace=True)
df.drop(["Contract Valid Until"],axis=1,inplace=True)
df.drop(["Release Clause"],axis=1,inplace=True)

In [17]:
df.to_csv("newfifa19.csv")

In [18]:
df1= pd.read_csv("newfifa19.csv")

In [19]:
df1.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
0,0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,€110.5M,€565K,...,75.0,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0
1,1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,€77M,€405K,...,85.0,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0
2,2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,€118.5M,€290K,...,81.0,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0
3,3,193080,De Gea,27,Spain,91,93,Manchester United,€72M,€260K,...,40.0,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0
4,4,192985,K. De Bruyne,27,Belgium,91,92,Manchester City,€102M,€355K,...,79.0,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0
