In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('../data/preprocessed_data.csv')

In [3]:
df.columns[2]

'no_of_rounds'

In [4]:
def convert_winner_to_int(X):
    if X == 'Red':
        return 1
    else:
        return 0

In [5]:
y = df.pop('Winner').apply(convert_winner_to_int)


In [6]:
df.dtypes.value_counts()

float64    118
int64       42
dtype: int64

In [7]:
y.value_counts()

1    4436
0    2250
Name: Winner, dtype: int64

In [8]:
n_samples, n_features = df.shape

In [9]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2,
                                                   random_state=124)

In [10]:
float_col = X_train.select_dtypes(include=[float]).columns

In [11]:
sc = StandardScaler()
X_train[float_col] = sc.fit_transform(X_train[float_col])
X_test[float_col] = sc.fit_transform(X_test[float_col])

In [12]:
X_train = torch.tensor(X_train.values.astype(np.float32))
X_test = torch.tensor(X_test.values.astype(np.float32))
y_train = torch.tensor(y_train.values.astype(np.float32))
y_test = torch.tensor(y_test.values.astype(np.float32))

In [13]:
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

# Model

In [14]:
class LogisticRegression(nn.Module):
    
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted

In [15]:
model = LogisticRegression(n_features)

# Loss and Optimizer

In [16]:
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Training Loop

In [17]:
num_epochs = 100
for epoch in range(num_epochs):
    # forward pass and loss
    y_predicted = model(X_train)
    loss = criterion(y_predicted, y_train)
    
    # backward pass
    loss.backward()
    
    # updates
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if (epoch+1) % 10 == 0:
        print(f'epoch: {epoch+1}, loss = {loss.item():.4f}')

epoch: 10, loss = 0.6849
epoch: 20, loss = 0.6590
epoch: 30, loss = 0.6452
epoch: 40, loss = 0.6367
epoch: 50, loss = 0.6307
epoch: 60, loss = 0.6263
epoch: 70, loss = 0.6227
epoch: 80, loss = 0.6199
epoch: 90, loss = 0.6174
epoch: 100, loss = 0.6153


# Results

In [18]:
with torch.no_grad():
    y_predicted = model(X_test)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy = {acc:.4f}')

accuracy = 0.6674


In [19]:
baseline = y_test.sum() / y_test.shape[0]
print(f'baseline = {baseline:.4f}')

baseline = 0.6712


# Prediction

In [20]:
pred_df = pd.read_csv('../data/prediction_data.csv')

In [21]:
pred_df.drop(columns=['index', 'Winner'], inplace=True)

In [22]:
pred_df.columns.value_counts()

title_bout                1
no_of_rounds              1
R_avg_DISTANCE_att        1
R_avg_DISTANCE_landed     1
R_avg_opp_DISTANCE_att    1
                         ..
B_current_lose_streak     1
B_longest_win_streak      1
B_wins                    1
B_losses                  1
R_Stance_Switch           1
Length: 160, dtype: int64

In [23]:
float_col.value_counts()

B_avg_KD                   1
R_avg_opp_TOTAL_STR_att    1
R_avg_opp_BODY_att         1
R_avg_BODY_landed          1
R_avg_BODY_att             1
                          ..
B_avg_DISTANCE_landed      1
B_avg_DISTANCE_att         1
B_avg_opp_LEG_landed       1
B_avg_opp_LEG_att          1
R_age                      1
Length: 118, dtype: int64

In [24]:
pred_df[float_col] = sc.fit_transform(pred_df[float_col])
pred_df = torch.tensor(pred_df.values.astype(np.float32))

In [25]:
df.columns

Index(['title_bout', 'no_of_rounds', 'B_avg_KD', 'B_avg_opp_KD',
       'B_avg_SIG_STR_pct', 'B_avg_opp_SIG_STR_pct', 'B_avg_TD_pct',
       'B_avg_opp_TD_pct', 'B_avg_SUB_ATT', 'B_avg_opp_SUB_ATT',
       ...
       'B_Stance_Open Stance', 'B_Stance_Orthodox', 'B_Stance_Sideways',
       'B_Stance_Southpaw', 'B_Stance_Switch', 'R_Stance_Open Stance',
       'R_Stance_Orthodox', 'R_Stance_Sideways', 'R_Stance_Southpaw',
       'R_Stance_Switch'],
      dtype='object', length=160)

In [26]:
pred_df

tensor([[ 0.0000,  3.0000, -0.4326,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  3.0000, -0.4326,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  3.0000, -0.4326,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  3.0000, -0.4326,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  3.0000,  3.1061,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  5.0000,  0.0555,  ...,  0.0000,  0.0000,  0.0000]])

In [27]:
with torch.no_grad():
    y_predicted = model(pred_df)
    y_predicted_cls = y_predicted.round()

In [28]:
y_predicted_cls

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]])