In [1]:
# import library
import pandas as pd
from collections import Counter

# ML library
from sklearn import svm, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

# evaluation metrics
from sklearn.metrics import classification_report, accuracy_score

import warnings
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv('fifa19.csv')

In [4]:
data.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,2,190871,Neymar Jr,26,https://cdn.sofifa.org/players/4/19/190871.png,Brazil,https://cdn.sofifa.org/flags/54.png,92,93,Paris Saint-Germain,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M
3,3,193080,De Gea,27,https://cdn.sofifa.org/players/4/19/193080.png,Spain,https://cdn.sofifa.org/flags/45.png,91,93,Manchester United,...,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M
4,4,192985,K. De Bruyne,27,https://cdn.sofifa.org/players/4/19/192985.png,Belgium,https://cdn.sofifa.org/flags/7.png,91,92,Manchester City,...,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M


In [5]:
data.isnull().sum()

Unnamed: 0          0
ID                  0
Name                0
Age                 0
Photo               0
                 ... 
GKHandling          1
GKKicking           1
GKPositioning       1
GKReflexes          1
Release Clause    152
Length: 89, dtype: int64

In [5]:
data.count()

Unnamed: 0        6248
ID                6248
Name              6248
Age               6248
Photo             6248
                  ... 
GKHandling        6247
GKKicking         6247
GKPositioning     6247
GKReflexes        6247
Release Clause    5656
Length: 89, dtype: int64

In [6]:
data.columns

Index(['Unnamed: 0', 'ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag',
       'Overall', 'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
       'Preferred Foot', 'International Reputation', 'Weak Foot',
       'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
       'Jersey Number', 'Joined', 'Loaned From', 'Contract Valid Until',
       'Height', 'Weight', 'LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW',
       'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM', 'RM', 'LWB', 'LDM',
       'CDM', 'RDM', 'RWB', 'LB', 'LCB', 'CB', 'RCB', 'RB', 'Crossing',
       'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingT

In [6]:
# dapatkan variable yang dibutuhkan
data_2 = data[['Position', 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
       'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
       'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
       'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
       'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
       'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
       'GKKicking', 'GKPositioning', 'GKReflexes']]

In [8]:
data_2.head()

Unnamed: 0,Position,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
0,RF,95.0,70.0,90.0,86.0,97.0,93.0,94.0,87.0,96.0,...,75.0,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0
1,ST,94.0,89.0,81.0,87.0,88.0,81.0,76.0,77.0,94.0,...,85.0,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0
2,LW,87.0,62.0,84.0,84.0,96.0,88.0,87.0,78.0,95.0,...,81.0,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0
3,GK,13.0,21.0,50.0,13.0,18.0,21.0,19.0,51.0,42.0,...,40.0,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0
4,RCM,82.0,55.0,92.0,82.0,86.0,85.0,83.0,91.0,91.0,...,79.0,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0


In [7]:
# posisi pemain
forward_player = ["ST", "LW", "RW", "LF", "RF", "RS", "LS", "CF"]
mid_player = ['LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM', 'RM', 'LWB', 'LDM','CDM', 'RDM', 'RAM', 'RM','LM']
defender_player = ['RWB', 'LB', 'LCB', 'CB', 'RCB', 'RB', 'LWB',]

# labeling posisi
data_2.loc[data_2["Position"] == "GK", "Position"] = 0
data_2.loc[data_2["Position"].isin(defender_player), "Position"] = 1
data_2.loc[data_2["Position"].isin(mid_player), "Position"] = 2
data_2.loc[data_2["Position"].isin(forward_player), "Position"] = 3

data_2 = data_2.dropna()
data_2["Position"] = data_2["Position"].astype("int64")
data_2['Position'].value_counts()

Position
2    875
1    608
3    406
0    199
Name: count, dtype: int64

In [8]:
X = data_2.drop(columns='Position')
y = data_2['Position']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

In [9]:
train_counter = Counter(y_train)
test_counter = Counter(y_test)

print("Distribusi kelas di set pelatihan:", train_counter)
print("Distribusi kelas di set pengujian:", test_counter)

Distribusi kelas di set pelatihan: Counter({2: 709, 1: 478, 3: 322, 0: 161})
Distribusi kelas di set pengujian: Counter({2: 166, 1: 130, 3: 84, 0: 38})


In [10]:
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

In [11]:
clf = OneVsOneClassifier(LinearSVC()).fit(X_train,y_train)

In [13]:
prediction = clf.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, prediction))
print(classification_report(y_test, prediction))

Accuracy:  0.8086124401913876
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.93      0.92      0.93       130
           2       0.91      0.58      0.71       166
           3       0.58      1.00      0.73        84

    accuracy                           0.81       418
   macro avg       0.85      0.88      0.84       418
weighted avg       0.86      0.81      0.81       418



In [14]:
clf = OneVsRestClassifier(LogisticRegression()).fit(X_train,y_train)

In [15]:
prediction = clf.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, prediction))
print(classification_report(y_test, prediction))

Accuracy:  0.8947368421052632
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.95      0.93      0.94       130
           2       0.87      0.87      0.87       166
           3       0.83      0.85      0.84        84

    accuracy                           0.89       418
   macro avg       0.91      0.91      0.91       418
weighted avg       0.90      0.89      0.89       418



In [16]:
clf = OneVsOneClassifier(LogisticRegression()).fit(X_train,y_train)

In [17]:
prediction = clf.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, prediction))
print(classification_report(y_test, prediction))

Accuracy:  0.8995215311004785
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.95      0.94      0.94       130
           2       0.87      0.87      0.87       166
           3       0.84      0.85      0.84        84

    accuracy                           0.90       418
   macro avg       0.91      0.91      0.91       418
weighted avg       0.90      0.90      0.90       418



In [18]:
clf = OneVsRestClassifier(LinearSVC()).fit(X_train,y_train)

In [19]:
prediction = clf.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, prediction))
print(classification_report(y_test, prediction))

Accuracy:  0.8277511961722488
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        38
           1       0.97      0.80      0.88       130
           2       0.71      0.96      0.82       166
           3       0.92      0.54      0.68        84

    accuracy                           0.83       418
   macro avg       0.90      0.82      0.84       418
weighted avg       0.86      0.83      0.82       418

