In [2]:
import os
#import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset , DataLoader
#from utils.helper import fn_plot_torch_hist, fn_plot_confusion_matrix

In [3]:
inpDir = 'D:\DNN\input' # location where input data is stored
outDir = '../output' # location to store outputs
subDir = 'D:\DNN\images' # location of the images
modelDir = '../models'
altName = '?????'

RANDOM_STATE = 24
torch.manual_seed(RANDOM_STATE) # Set Random Seed for reproducible  results

EPOCHS = 2001 # number of epochs
ALPHA = 0.001 # learning rate
TEST_SIZE = 0.2
BATCH_SIZE = 256

# parameters for Matplotlib
params = {'legend.fontsize': 'large',
          'figure.figsize': (15, 6),
          'axes.labelsize': 'medium',
          'axes.titlesize':'large',
          'xtick.labelsize':'medium',
          'ytick.labelsize':'medium'
         }

plt.rcParams.update(params)

CMAP = plt.cm.coolwarm
plt.style.use('seaborn-v0_8-darkgrid') # plt.style.use('ggplot')

In [4]:
data_df = pd.read_csv(os.path.join(inpDir, 'fifa_2019.csv'))
data_df.shape


(18207, 89)

In [5]:
data_df = data_df[data_df["Position"].notnull()]
data_df.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,2,190871,Neymar Jr,26,https://cdn.sofifa.org/players/4/19/190871.png,Brazil,https://cdn.sofifa.org/flags/54.png,92,93,Paris Saint-Germain,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M
3,3,193080,De Gea,27,https://cdn.sofifa.org/players/4/19/193080.png,Spain,https://cdn.sofifa.org/flags/45.png,91,93,Manchester United,...,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M
4,4,192985,K. De Bruyne,27,https://cdn.sofifa.org/players/4/19/192985.png,Belgium,https://cdn.sofifa.org/flags/7.png,91,92,Manchester City,...,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M


In [6]:
# Following columns appear to be relevant for our analysis
rel_cols = ["Position", 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
            'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
            'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
            'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
            'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
            'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
            'GKKicking', 'GKPositioning', 'GKReflexes']

In [7]:
goalkeeper = 'GK'
forward = ['ST', 'LW', 'RW', 'LF', 'RF', 'RS','LS', 'CF']
midfielder = ['CM','RCM','LCM', 'CDM','RDM','LDM', 'CAM', 'LAM', 'RAM', 'RM', 'LM']
defender = ['CB', 'RCB', 'LCB', 'LWB', 'RWB', 'LB', 'RB']

In [8]:
#Assign labels to goalkeepers
data_df.loc[data_df["Position"] == "GK", "Position"] = 0

#Defenders
data_df.loc[data_df["Position"].isin(defender), "Position"] = 1

#Midfielders
data_df.loc[data_df["Position"].isin(midfielder), "Position"] = 2

#Forward
data_df.loc[data_df["Position"].isin(forward), "Position"] = 3

# Convert Column "Position" to numeric so that Pandas does not complain
data_df['Position'] = pd.to_numeric(data_df['Position'], downcast="integer")

In [9]:
data_df = data_df[rel_cols]
data_df.head()

Unnamed: 0,Position,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
0,3,95.0,70.0,90.0,86.0,97.0,93.0,94.0,87.0,96.0,...,75.0,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0
1,3,94.0,89.0,81.0,87.0,88.0,81.0,76.0,77.0,94.0,...,85.0,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0
2,3,87.0,62.0,84.0,84.0,96.0,88.0,87.0,78.0,95.0,...,81.0,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0
3,0,13.0,21.0,50.0,13.0,18.0,21.0,19.0,51.0,42.0,...,40.0,68.0,15.0,21.0,13.0,90.0,85.0,87.0,88.0,94.0
4,2,82.0,55.0,92.0,82.0,86.0,85.0,83.0,91.0,91.0,...,79.0,88.0,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0


In [10]:
class_labels = {0: 'GK', 1: 'Defender', 2: 'Mid Fielder', 3 : 'Forward'}

In [11]:
y = data_df["Position"].to_numpy()

X = data_df.drop("Position", axis = 1)

X.shape, y.shape

((18147, 33), (18147,))

In [12]:
#  Split the data in training and test sets to measure performance of the model.
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=TEST_SIZE, 
                                                    stratify=y, 
                                                    random_state=RANDOM_STATE )

print (X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(14517, 33) (14517,) (3630, 33) (3630,)


In [13]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

In [14]:
class FifaDataset(Dataset):
    def __init__(self , X,y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx] , self.y[idx]
        

In [15]:
train_dataset = FifaDataset(X_train , y_train)

train_loader = DataLoader(dataset= train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

for batch_idx ,(data , target) in enumerate(train_loader):
    print(f"Batch {batch_idx + 1}: " , end="")
    print("Data:", data.shape , end ='')
    print("target:" ,target.shape)

Batch 1: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 2: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 3: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 4: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 5: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 6: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 7: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 8: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 9: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 10: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 11: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 12: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 13: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 14: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 15: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 16: Data: torch.Size([256, 33])target: torch.Size([256])
B

In [16]:
test_dataset = FifaDataset(X_test , y_test)

test_loader = DataLoader(dataset= test_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

for batch_idx ,(data , target) in enumerate(test_loader):
    print(f"Batch {batch_idx + 1}: " , end="")
    print("Data:", data.shape , end ='')
    print("target:" ,target.shape)

Batch 1: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 2: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 3: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 4: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 5: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 6: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 7: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 8: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 9: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 10: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 11: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 12: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 13: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 14: Data: torch.Size([256, 33])target: torch.Size([256])
Batch 15: Data: torch.Size([46, 33])target: torch.Size([46])


In [17]:
class Model(nn.Module):
    def __init__(self , input_dim):
        super (Model , self).__init__()
        self.layer1 = nn.Linear( input_dim , 18)
        self.activ1 = nn.SiLU()
        self.layer2 = nn.Linear( 18 , 4)

    def forward(self, x):
        x = self.layer1(x)
        x = self.activ1(x)
        x = self.layer2(x)
        return x

In [18]:
torch.cuda.is_available()

False

In [19]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [20]:
input_dim = X_train.shape[1]

model = nn.Sequential(
    nn.Linear( input_dim, 18),
    nn.Tanh(),
    nn.Linear( 18, 4),).to(device=device)

In [21]:
print(model)

Sequential(
  (0): Linear(in_features=33, out_features=18, bias=True)
  (1): Tanh()
  (2): Linear(in_features=18, out_features=4, bias=True)
)


In [22]:
print (f'Num Parameters: {len(list(model.parameters()))}')
print (f'Layer 1: Weights : {list(model.parameters())[0].shape}')
print (f'Layer 1: Bias    : {list(model.parameters())[1].shape}')
print (f'Layer 2: Weights : {list(model.parameters())[2].shape}')
print (f'Layer 2: Bias    : {list(model.parameters())[3].shape}')

Num Parameters: 4
Layer 1: Weights : torch.Size([18, 33])
Layer 1: Bias    : torch.Size([18])
Layer 2: Weights : torch.Size([4, 18])
Layer 2: Bias    : torch.Size([4])


In [23]:
train_X = torch.tensor(X_train, dtype=torch.float32, device=device)   #train_X = Torch
train_y = torch.tensor(y_train, dtype=torch.int64, device=device)     #X_train = Numpy arrary

test_X = torch.tensor(X_test, dtype=torch.float32, device=device)
test_y = torch.tensor(y_test, dtype=torch.int64, device=device)


In [24]:
predict_prob = model(train_X[:1]) # make predictions
predict_prob.detach().cpu().numpy()   #For Local Machine remove .cpu()

array([[0.3238876 , 0.37457144, 0.10015664, 0.2754107 ]], dtype=float32)

In [25]:
# Loss Function
loss_fn = nn.CrossEntropyLoss()

# Optimizers
optimizer = torch.optim.Adam(model.parameters(), lr = ALPHA)

# Iterations
for epoch in range (EPOCHS):
    
    model.train() # Set the model in training mode
    
    predict_prob = model(train_X) # make predictions
    
    for batch_idx,(train_X , train_y) in enumerate(train_loader):
        train_X , train_y = train_X.to(device) , train_y.to(device)
        predict_prob = model (train_X)
        curr_loss = loss_fn(predict_prob, train_y) # calculate loss

    ###---------------
    ### Back prop step
    ###---------------
    optimizer.zero_grad() 
    curr_loss.backward()
    optimizer.step()