In [None]:
from pathlib import Path
from matplotlib.font_manager import fontManager
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
import seaborn as sns
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix,ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
rng=np.random.default_rng(seed=42)
import torch
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
# from utils.helper import fn_plot_torch_hist,fn_plot_confusion_matrix
import tensorflow as tf

from sklearn.preprocessing import StandardScaler


In [None]:
###----------------------
### Some basic parameters
###----------------------

inpDir = Path('..') / '..' / 'input'
outDir = Path('..') / 'output'
modelDir = Path('..') / 'models'
subDir = 'fifa_2019'

RANDOM_STATE = 24 # for initialization ----- REMEMBER: to remove at the time of promotion to production
np.random.seed(RANDOM_STATE)
rng = np.random.default_rng(seed = RANDOM_STATE) # Set Random Seed for reproducible  results

EPOCHS = 100 # number of epochs
BATCH_SIZE = 32
ALPHA = 0.001 # learning rate
TEST_SIZE = 0.2
TRAIN_SIZE=454*BATCH_SIZE

# parameters for Matplotlib
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 6),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'
         }

plt.rcParams.update(params)

CMAP = plt.cm.coolwarm
plt.style.use('seaborn-v0_8-darkgrid') # plt.style.use('ggplot')

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Check if all directories are present
outDir.mkdir(parents=True, exist_ok=True)

modelSubDir = modelDir/ subDir
modelSubDir.mkdir(parents=True, exist_ok=True)

In [None]:
data_df = pd.read_csv(r"D:\dnn_input\fifa_2019.csv")
data_df.shape

In [None]:
# removing rows with position = null
data_df = data_df[data_df["Position"].notnull()]
data_df.head()

In [None]:
data_df.info()

In [None]:
data_df.describe().T

In [None]:
data_df.isna().sum()[data_df.isna().sum() > 0]

In [None]:
# Following columns appear to be relevant for our analysis
rel_cols = ["Position", 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
            'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
            'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
            'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
            'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
            'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
            'GKKicking', 'GKPositioning', 'GKReflexes']

In [None]:
goalkeeper = 'GK'
forward = ['ST', 'LW', 'RW', 'LF', 'RF', 'RS','LS', 'CF']
midfielder = ['CM','RCM','LCM', 'CDM','RDM','LDM', 'CAM', 'LAM', 'RAM', 'RM', 'LM']
defender = ['CB', 'RCB', 'LCB', 'LWB', 'RWB', 'LB', 'RB']

In [None]:
data_df=data_df[rel_cols]
data_df=data_df[data_df['Position'].notnull()]
data_df.head()

In [None]:
data_df.info()

In [None]:
data_df.describe().T

In [None]:
for col in data_df.columns:
    print(f'Col: {col} -{data_df[col].unique()}')

In [None]:
data_df.isnull().sum()

In [None]:
#Assign labels to goalkeepers
data_df.loc[data_df["Position"] == "GK", "Position"] = 0
#Defenders
data_df.loc[data_df["Position"].isin(defender), "Position"] = 1
#Midfielders
data_df.loc[data_df["Position"].isin(midfielder), "Position"] = 2
#Forward
data_df.loc[data_df["Position"].isin(forward), "Position"] = 3

In [None]:
# Convert Column "Position" to numeric so that Pandas does not complain
data_df['Position'] = pd.to_numeric(data_df['Position'], downcast="integer")

In [None]:
class_labels = {0: 'Goal Keeper', 1: 'Defender', 2: 'Mid-Fielder', 3: 'Forward'}

labels=data_df['Position']
features=data_df.drop('Position',axis=1)
labels.shape,features.shape

In [None]:
train_df,test_df=train_test_split(data_df,stratify=data_df["Position"],train_size=TRAIN_SIZE,random_state=RANDOM_STATE)
train_df.shape,test_df.shape

## Custom DataSet

In [None]:
class GlobalScaler:
    _instance=None

    def __init__(self):
        if GlobalScaler._instance is not None:
            raise Exception("GlobalScaler is Singleton Class")
        self.scaler=StandardScaler()

    @classmethod
    def get_instance(cls):
        if cls._instance is None:
            cls._instance=GlobalScaler()
        return cls._instance


In [None]:
class FifaDS(Dataset):
    globalScaler=GlobalScaler.get_instance() #Protected Variable

    def __init__(self,dataframe,device=device,is_train=True,label_col=None):
        self.df=dataframe
        self.device=device
        self.is_train=is_train
        self.scaler=self.globalScaler.scaler
        self.label_col=label_col
    
        self.labels=self.df[label_col].to_numpy()
        if self.is_train:
            X=self.df.drop(label_col,axis=1)
            self.features=self.scaler.fit_transform(X)
        else:
            X=self.df.drop(label_col,axis=1)
            self.features=self.scaler.transform(X)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self,index):
        feature=self.features[index]
        label=self.labels[index]
        # return super().__getitem__(index)

        feature = torch.tensor(feature,dtype=torch.float32,device=self.device)
        label = torch.tensor(label,dtype=torch.int64,device=self.device)
        return feature,label

In [None]:
train_ds = FifaDS(train_df,is_train=True,label_col="Position")
test_ds = FifaDS(test_df,is_train=True,label_col="Position")

In [None]:
train_loader = DataLoader(train_ds,batch_size=BATCH_SIZE,shuffle=True)
test_loader = DataLoader(test_ds,batch_size=BATCH_SIZE,shuffle=True)


for count,data in enumerate(test_loader):
    feast, lbs =data
    print(f'count:{count} featues:{feast.shape}  labels:{lbs}')

## MOdel Creation

In [None]:
class Model(nn.Module):

    def __init__(self,input_dim):
        super().__init__()
        dim_1 = 66

        self.layer1 = nn.Linear(input_dim,dim_1)
        self.activ1 = nn.ReLU()                 ##  output 66

        dim_2 = 33
        self.layer2 = nn.Linear(dim_1,dim_2)
        self.activ2 = nn.ReLU()                 ##  output 33

        dim_3 = 16
        self.layer3 = nn.Linear(dim_2,dim_3)
        self.activ3 = nn.ReLU()                 ##  output 16

        dim_4 = 8
        self.layer4 = nn.Linear(dim_3,dim_4)
        self.activ4 = nn.ReLU()                 ##  output 8

        output_dim = 4
        self.layer5 = nn.Linear(dim_4,output_dim)  ## Output 4
        # self.activ5 = nn.LogSigmoid(dim =1)

    def forward(self,x):
        x = self.activ1(self.layer1(x))
        x = self.activ2(self.layer2(x))
        x = self.activ3(self.layer3(x))
        x = self.activ4(self.layer4(x))
        x = self.layer5(x)
        return x

model =Model(input_dim=33).to(device)

In [None]:
from torchinfo import summary


summary(model)

In [None]:
print(model)

In [None]:
from cProfile import label


loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=ALPHA)

loss,tloss,acc,tacc,n_epoch=[],[],[],[],[]

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0
    train_acc = 0.0

    ## Train
    for i, data in enumerate(train_loader):
        inputs,labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        pred = torch.argmax(outputs,dim=1)

        batch_loss = loss_fn(outputs,labels)
        batch_acc = accuracy_score(labels.cpu().numpy(),pred.cpu().numpy())
        batch_loss.backward()
        optimizer.step()

        train_loss+=batch_loss.item()*inputs.size(0)
        train_acc+=batch_acc*inputs.size(0)

    train_loss=train_loss/len(train_ds)
    loss.append(train_loss)

    train_acc = train_acc/len(train_ds)
    acc.append(train_acc)

    with torch.no_grad():
        model.eval()
        test_loss = 0.0
        test_acc = 0.0

        ## Test
        for i, data in enumerate(test_loader):
            inputs,labels = data
            optimizer.zero_grad()
            outputs = model(inputs)
            pred = torch.argmax(outputs,dim=1)

            batch_loss = loss_fn(outputs,labels)
            batch_acc = accuracy_score(labels.cpu().numpy(),pred.cpu().numpy())


            test_loss+=batch_loss.item()*inputs.size(0)
            test_acc+=batch_acc*inputs.size(0)

        test_loss = test_loss/len(test_ds)
        test_acc = test_acc/len(test_ds)

        tloss.append(test_loss)
        tacc.append(test_acc)

    n_epoch.append(epoch)
    if epoch%10==0:
        print(f"Epoch:{epoch}|Loss:{train_loss:.4f}-Test_loss{test_loss:.4f} | Acc:{train_acc:.4f}-test_acc{test_acc:.4f}")





In [None]:
len(loss),len(tloss),len(acc),len(tacc),len(n_epoch)

In [None]:
result_df=pd.DataFrame(data={'loss':loss,"val_loss":tloss,"accuracy":acc,'val_accuracy':tacc,'epoch':n_epoch})

In [None]:
fig,ax = plt.subplots(1,2,figsize=(15,8))
ax[0].plot(result_df.epoch,result_df['accuracy'],label='accuracy')
ax[0].plot(result_df.epoch,result_df['val_accuracy'],label='val_accuracy')
ax[0].set_title('Accuracy')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')
ax[0].legend()

ax[1].plot(result_df.epoch,result_df['loss'],label='loss')
ax[1].plot(result_df.epoch,result_df['val_loss'],label='val_loss')
ax[1].set_title('Loss')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('loss')
ax[1].legend()

plt.show()

In [None]:
import tensorflow as tf