In [1]:
import numpy as np
import time
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
X = pd.read_csv("/content/drive/MyDrive/Colab ML Data/football_prediction_raw_X.csv").set_index("ID")
y = pd.read_csv("/content/drive/MyDrive/Colab ML Data/football_prediction_y.csv").set_index("ID")
y = pd.get_dummies(y['0'])
y

Unnamed: 0_level_0,AWAY_WINS,DRAW,HOME_WINS
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,True,False,False
1,False,True,False
2,True,False,False
3,False,False,True
4,False,True,False
...,...,...,...
12298,True,False,False
12299,True,False,False
12300,True,False,False
12301,False,False,True


In [4]:
print(X.shape)

(12303, 268)


In [26]:
# Model Architecture
class FootballModelFullyConnected(nn.Module):
    def __init__(self, x_dim, dH):
        super(FootballModelFullyConnected, self).__init__()

        self.dropout = nn.Dropout(0.2)
        self.fc1 = nn.Linear(x_dim, dH)  # 3 is the dimensions of the input layer
        self.fc2 = nn.Linear(dH, dH)
        self.fc3 = nn.Linear(dH, dH)
        self.fc4 = nn.Linear(dH, dH)
        self.fc5 = nn.Linear(dH, dH)
        self.fc6 = nn.Linear(dH, dH)
        self.fc7 = nn.Linear(dH, 3)
      #  self.softmax = nn.Softmax(3)


    def forward(self, x):  # forward methods just defines the architecture

        x = F.relu(self.fc1( x ))
        x = F.relu(self.fc2( x ))  # F represents the functional library
        x = F.relu(self.fc3( x ))
        x = self.dropout( x )
        x = F.relu(self.fc4( x ))
     #   x = F.relu(self.fc5( x ))
      #  x = F.relu(self.fc6( x ))
       # x = self.dropout( x )
        # Final fully connected layer
        x = self.fc7( x )

        return x

In [27]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

In [28]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent', add_indicator=False)),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.9))
])
pipeline.fit(x_train)
X_train_tilde = pipeline.transform(x_train)
X_test_tilde = pipeline.transform(x_test)

In [29]:
# PyTorch works only with arrays
x_train = np.array(X_train_tilde)
x_test = np.array(X_test_tilde)
y_train = np.array(y_train)
y_test = np.array(y_test)

input_size = x_train.shape[1]
hidden_size = 64
num_layers = 2
output_size = 1

In [30]:
# Initializing model
H = 120

model1 = FootballModelFullyConnected(x_train.shape[1], hidden_size)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model1.parameters(),lr=0.01)

batch_size = 100 # mini-batch
num_epochs = 100
L_Y_train = len(y_train)

l1_reg = 0.0001
l2_reg = 0.0001

In [31]:
X_train_tensor = torch.FloatTensor(x_train)
y_train_tensor = torch.FloatTensor(y_train)

# Define loss function and optimizer
optimizer = optim.Adam(model1.parameters(), lr=0.001)

model1.train()
model1.cuda()

epoch_accuracies = [] # Will keep track of loss function values

# Training
for epoch in range(num_epochs):
  train_accuracy = []

  for i in range(0,L_Y_train,batch_size):
    x_train_batch = X_train_tensor[i:i+batch_size,:]
    y_train_batch = y_train_tensor[i:i+batch_size]

    data, target = Variable(x_train_batch).cuda(), Variable(y_train_batch).cuda()

    optimizer.zero_grad() # reset the grad at beginning of each minibatch

    output = model1(data)

    loss = criterion(output, target)

    # Apply L1 regularization
    l1_norm = sum(p.abs().sum() for p in model1.parameters())
    loss += l1_reg * l1_norm

    # Apply L2 regularization

    l2_norm = sum(p.pow(2).sum() for p in model1.parameters())
    loss += l2_reg * l2_norm

    loss.backward()

    optimizer.step()
    prediction = output.data.max(1)[1] #Label Prediction
    target_pick = target.data.max(1)[1]
    accuracy = (float(prediction.eq(target_pick.data).sum())/float(batch_size))*100.0 #Computing the training accuracy
    train_accuracy.append(accuracy)
  accuracy_epoch = np.mean(train_accuracy)
  epoch_accuracies.append(accuracy_epoch)
  print('\nIn epoch ', epoch,' the accuracy of the training set =', accuracy_epoch)



In epoch  0  the accuracy of the training set = 45.323232323232325

In epoch  1  the accuracy of the training set = 49.666666666666664

In epoch  2  the accuracy of the training set = 50.676767676767675

In epoch  3  the accuracy of the training set = 51.56565656565657

In epoch  4  the accuracy of the training set = 52.41414141414141

In epoch  5  the accuracy of the training set = 53.515151515151516

In epoch  6  the accuracy of the training set = 54.75757575757576

In epoch  7  the accuracy of the training set = 56.04040404040404

In epoch  8  the accuracy of the training set = 57.484848484848484

In epoch  9  the accuracy of the training set = 58.888888888888886

In epoch  10  the accuracy of the training set = 60.484848484848484

In epoch  11  the accuracy of the training set = 62.686868686868685

In epoch  12  the accuracy of the training set = 64.35353535353535

In epoch  13  the accuracy of the training set = 65.06060606060606

In epoch  14  the accuracy of the training set = 

KeyboardInterrupt: 

In [32]:
model1.eval()

x_test_tensor = torch.FloatTensor(x_test)
y_test_tensor = torch.FloatTensor(y_test)

data, target = Variable(x_test_tensor).cuda(), Variable(y_test_tensor).cuda()
output = model1(data)
prediction = output.data.max(1)[1]
target_pick = target.data.max(1)[1]
accuracy = (float(prediction.eq(target_pick.data).sum()))/float(len(x_test))

print('Test Accuracy Fully Connected 4 layers:',accuracy)

Test Accuracy Fully Connected 4 layers: 0.4124339699309224
