In [None]:
# IMPORTS
import gzip
import torch
import torch.nn as nn 
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

## Data Processing

Our dataset is categorized by winner data and loser data. However, in the prediction stage, we don't want to leak who the winner is to the model. To avoid this issue, we will change the winners and losers to Player A and Player B, with a label of 1 (winner) or 0 (loser). This way, the features will not already indicate who won and who didn't.

We created a regular pairing and flipped pairing (Player A is winner vs. Player A is loser) to make the model symmetric and double our dataset.

In [None]:
# df = pd.read_csv(path, compression="gzip") # full dataset

df = []

f = gzip.open("cse158-assignment2-master.csv.gz")
for l in f:
    d = eval(l)
    df.append(d)
    if len(df) >= 20000: # what's a good number?
        break

f.close()

In [None]:
stat_columns = [
        "trophies", "isinclan", "cards", "troops", "structures", "spells",
        "commons", "rares", "epics", "legendaries", "elixir.average"
]

In [None]:
# A is winner, B is loser
rows_A_winner = {}
for stat in stat_columns:
    rows_A_winner[f"A.{stat}"] = df[f"winner.{stat}"]
    rows_A_winner[f"B.{stat}"] = df[f"loser.{stat}"]
y_A_winner = pd.Series(np.ones(len(df)), name="label")   # A won = 1

df_A = pd.DataFrame(rows_A_winner)
df_A["label"] = y_A_winner

In [None]:
# B is winner, A is loser
rows_B_winner = {}
for stat in stat_columns:
    rows_B_winner[f"A.{stat}"] = df[f"winner.{stat}"]
    rows_B_winner[f"B.{stat}"] = df[f"loser.{stat}"]
y_B_winner = pd.Series(np.zeros(len(df)), name="label")   # A lost = 0

df_B = pd.DataFrame(rows_B_winner)
df_B["label"] = y_B_winner

In [None]:
df_pairs = pd.concat([df_A, df_B], ignore_index=True)

## Building Feature Vector

We used difference features, like delta_trophies (A.trophies - B.trophies), to improve prediction accuracy for competitive game matchups.

In [None]:
diff_features = {}

for stat in stat_columns:
    diff_features[f"delta_{stat}"] = df_pairs[f"A.{stat}"] - df_pairs[f"B.{stat}"]
# are absolute features like total cards needed?

df_diff = pd.DataFrame(diff_features)

In [None]:
# Feature Matrix

X = df_diff
y = df_pairs["label"]

return X, y

## Train Val/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

## Building Neural Network

using pytorch

In [None]:
class ClashRoyaleNetwork(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),

            nn.Linear(64, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.2),

            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

In [None]:
model = ClashRoyaleNetwork(input_dim=X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(25):
    model.train()
    optimizer.zero_grad()

    predictions = model(torch.tensor(X_train, dtype=torch.float32))
    loss = criterion(predictions.squeeze(), torch.tensor(y_train, dtype=torch.float32))

    loss.backward()
    optimizer.step()

    if epoch % 5 == 0:
        print(f"epoch {epoch} | loss = {loss.item():.4f}")

## Evaluation

In [None]:
model.eval()
with torch.no_grad():
    predictions = model(torch.tensor(X_test, dtype=torch.float32)).squeeze()
    predictions = (predictions > 0.5).numpy()

accuracy = accuracy_score(y_test, predictions)
print("Test accuracy:", accuracy)