# Constructing the Deep Learning Models

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, Dataset

The following cells import the training and validation data.

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


xTrain = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/xTrain.pt")
xVal = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/xVal.pt")


yTrain = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/yTrain.pt")
yVal = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/yVal.pt")


zTrain = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/zTrain.pt")
zVal = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/zVal.pt")


lengthsTrain = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/lengthsTrain.pt")
lengthsVal = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/lengthsVal.pt")




In [3]:
xTrain.shape

torch.Size([28000, 99, 216])

In [3]:
distTrain = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/distributionTrain.pt")
distVal = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/distributionVal.pt")

The following model predicts whether a Player will switch out their Pokemon or engage in battle. 

In [4]:
class BattleSequenceDataset(Dataset) :
    def __init__(self, sequences, targets, lengths) :
        self.sequences = sequences
        self.targets = targets
        self.lengths = lengths

    def __len__(self) :
        return len(self.sequences)
    
    def __getitem__(self, idx) :
        return self.sequences[idx], self.targets[idx], self.lengths[idx]


class ActionClassifier(nn.Module) :
    def __init__(self, input_dim, hidden_dim, output_dim, dropout = 0.0) :
        super(ActionClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first = True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, lengths) :
        packed_input = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.lstm(packed_input)
        lstm_out, _ = pad_packed_sequence(packed_output, batch_first=True)
        lstm_out = self.dropout(lstm_out)  # Apply dropout to the output of the LSTM
        output = self.fc(lstm_out)  # Pass through the fully connected layer
        return output    

def trainModel(model, train_loader, val_loader, device, epochs=10):
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        model.train()
        totalLoss = 0
        for inputs, labels, lengths in train_loader:
            inputs = inputs.to(device)  # Ensure inputs are on GPU
            lengths = lengths.cpu()  # Ensure lengths are on CPU
            optimizer.zero_grad()
            outputs = model(inputs, lengths)  # Model expects GPU inputs and CPU lengths
            outputsReshaped = outputs.view(-1, outputs.size(-1))
            labelsReshaped = labels[:, :outputs.shape[1]].to(device).view(-1).long()  # Directly slice and move to GPU
            loss = criterion(outputsReshaped, labelsReshaped)
            totalLoss += loss.item()
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, labels, lengths in val_loader:
                inputs = inputs.to(device)  # Ensure inputs are on GPU
                lengths = lengths.cpu()  # Ensure lengths are on CPU
                val_outputs = model(inputs, lengths)
                valReshaped = val_outputs.view(-1, val_outputs.size(-1))
                valLabelsReshaped = labels[:, :val_outputs.shape[1]].to(device).view(-1).long()  # Directly slice and move to GPU
                val_loss += criterion(valReshaped, valLabelsReshaped).item()

        print(f'Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss / len(val_loader):.4f}, Training Loss: {totalLoss / len(train_loader):.4f}')

In [None]:
"""
class ActionClassifier(nn.Module) :
    def __init__(self, input_dim, hidden_dim, output_dim) :
        super(ActionClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first = True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, lengths) :
        packed_input = pack_padded_sequence(x, lengths, batch_first = True, enforce_sorted = False)
        packed_output, _ = self.lstm(packed_input)
        lstm_out, _ = pad_packed_sequence(packed_output, batch_first = True)
        output = self.fc(lstm_out)
        return output
"""

In [None]:
"""
class ActionClassifier(nn.Module) :
    def __init__(self, input_dim, hidden_dim, output_dim) :
        super(ActionClassifier,  self).__init__()
        self.lstm1 = nn.LSTM(input_dim, hidden_dim, batch_first = True)
        self.lstm2 = nn.LSTM(hidden_dim, hidden_dim, batch_first = True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, lengths) :
        packed_input = pack_padded_sequence(x, lengths, batch_first = True, enforce_sorted = False)
        packed_output, _ = self.lstm1(packed_input)
        lstm_out, _ = pad_packed_sequence(packed_output, batch_first = True)
        packed_output, _ = self.lstm2(pack_padded_sequence(lstm_out, lengths, batch_first = True, enforce_sorted = False))
        lstm_out, _ = pad_packed_sequence(packed_output, batch_first = True)
        output = self.fc(lstm_out)
        return output
"""

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data = BattleSequenceDataset(xTrain, yTrain, lengthsTrain)
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True)
val_data = BattleSequenceDataset(xVal, yVal, lengthsVal)
val_loader = DataLoader(val_data, batch_size = 32)

input_dim = xTrain.shape[2]
hidden_dim = 128 # Maybe you should experiment with the sizes. Think about possible implications of rank limitation when going to a lower dimension vector space. #128
output_dim = 2
weight_dropout = 0.2
model = ActionClassifier(input_dim, hidden_dim, output_dim, weight_dropout)
model = model.to(device)

trainModel(model, train_loader, val_loader, device, 30)

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [17]:
testLoader = DataLoader(val_data, batch_size = 1)

with torch.no_grad() :
     for inputs, labels, lengths, in testLoader :
               print(inputs.shape)
               print(lengths.shape)
               val_outputs = model(inputs.to(device), lengths)
               print(val_outputs.shape)
               outputs = val_outputs.view(-1, val_outputs.size(-1))
               true = labels
               break

trueArray = true.view(-1, true.size(-1))


prob_matrix = torch.softmax(outputs, 1)
indices = np.argsort(prob_matrix.cpu().numpy(), axis = 1)
indices = indices[:, ::-1][:, :1]

results = []
for i, j in zip(trueArray, indices) :
    if i[0] == -1 :
        continue
    if int(i[0]) == j :
        results.append(1)
    else :
        results.append(0)

arr = np.array(results)

np.mean(arr)
        


torch.Size([1, 99, 216])
torch.Size([1])
torch.Size([1, 19, 2])


0.8947368421052632

In [26]:
val_outputs[0].shape

torch.Size([19, 2])

The above model is 80.931% accurate.

The following cells construct a model that attempts to predict the exact move a player will select. The following cell determines the average number of classes that are possible for the model to pick from.

In [11]:
df = pd.read_csv("C:/Users/coliv/summerProjects/Summer-Repository/moveset_dictionary.csv").set_index("name").T
movePoolSizes = []
for i in df.columns :
    movePoolSizes.append(sum(df[i]))

movePoolSizes

meanMovepool = np.mean(movePoolSizes)

print(f"The average movepool size in gen3 is {meanMovepool}.")

The average movepool size in gen3 is 64.39378238341969.


In [7]:
class BattleSequenceDataset(Dataset) :
    def __init__(self, sequences, targets, possibility_labels, lengths, distributions) :
        self.sequences = sequences
        self.targets = targets
        self.possibility_labels = possibility_labels
        self.lengths = lengths
        self.distributions = distributions

    def __len__(self) :
        return len(self.sequences)
    
    def __getitem__(self, idx) :
        return self.sequences[idx], self.targets[idx], self.possibility_labels[idx], self.lengths[idx], self.distributions[idx]


class MoveClassifier(nn.Module) :
    def __init__(self, input_dim, hidden_dim, output_dim, dropout = 0.0) :
        super(MoveClassifier, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first = True)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_dim, output_dim)
        self.fc2 = nn.Linear(output_dim, output_dim) # You added an fc2

    def forward(self, x, lengths) :
        packed_input = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.lstm(packed_input)
        lstm_out, _ = pad_packed_sequence(packed_output, batch_first=True)
        lstm_out = self.dropout(lstm_out)
        currStep = self.dropout(self.fc1(lstm_out)) 
        currStep = torch.relu(currStep) # This step is new
        output = self.fc2(currStep) # This step is new
        return output  
    
def trainModel(model, train_loader, val_loader, device, epochs=10):
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        model.train()
        totalLoss = 0
        for inputs, labels, possibilities, lengths, dists in train_loader:
            inputs = inputs.to(device)
            lengths = lengths.cpu()  # Ensure lengths are on CPU
            optimizer.zero_grad()
            outputs = model(inputs, lengths)  # Model expects GPU inputs and CPU lengths
            outputsReshaped = outputs.view(-1, outputs.size(-1))
            
            possibilitiesReshaped = possibilities[:, :outputs.shape[1], :].to(device).view(-1, outputs.size(-1))
            distsReshaped = dists[:, :outputs.shape[1], :].to(device).view(-1, outputs.size(-1))
            labelsReshaped = labels[:, :outputs.shape[1]].to(device).view(-1).long()  # Directly slice and move to GPU
            distsReshaped = distsReshaped 
            outputsReshaped = torch.mul(outputsReshaped, possibilitiesReshaped)
            #outputsReshaped = outputsReshaped + distsReshaped
            loss = criterion(outputsReshaped, labelsReshaped)
            totalLoss += loss.item()
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, labels, possibilities, lengths, dists in val_loader:
                inputs = inputs.to(device)  # Ensure inputs are on GPU
                lengths = lengths.cpu()  # Ensure lengths are on CPU
                val_outputs = model(inputs, lengths)
                valReshaped = val_outputs.view(-1, val_outputs.size(-1))
                valPossibilitiesReshaped = possibilities[:, :val_outputs.shape[1], :].to(device).view(-1, outputs.size(-1))
                valLabelsReshaped = labels[:, :val_outputs.shape[1]].to(device).view(-1).long()  # Directly slice and move to GPU
                distsReshaped = dists[:, :val_outputs.shape[1], :].to(device).view(-1, outputs.size(-1))
                distsReshaped = distsReshaped 
                valReshaped = torch.mul(valReshaped, valPossibilitiesReshaped)
                #valReshaped = valReshaped + distsReshaped
                val_loss += criterion(valReshaped, valLabelsReshaped).item()

        print(f'Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss / len(val_loader):.4f}, Training Loss: {totalLoss / len(train_loader):.4f}')

In [9]:
device = device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data = BattleSequenceDataset(xTrain, yTrain, zTrain, lengthsTrain, distTrain)
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True)
val_data = BattleSequenceDataset(xVal, yVal, zVal, lengthsVal, distVal)
val_loader = DataLoader(val_data, batch_size = 32)

In [5]:
zTrain.shape

torch.Size([28000, 99, 354])

In [8]:
input_dim = xTrain.shape[2]
hidden_dim = 128 # Maybe you should experiment with the sizes. Think about possible implications of rank limitation when going to a lower dimension vector space. #128
output_dim = 354
weight_dropout = 0.2
model = MoveClassifier(input_dim, hidden_dim, output_dim, weight_dropout)
model = model.to(device)

trainModel(model, train_loader, val_loader, device, 50)

Epoch [1/50], Validation Loss: 2.0181, Training Loss: 2.4100
Epoch [2/50], Validation Loss: 1.8847, Training Loss: 2.0566
Epoch [3/50], Validation Loss: 1.8316, Training Loss: 1.9670
Epoch [4/50], Validation Loss: 1.7871, Training Loss: 1.9099
Epoch [5/50], Validation Loss: 1.7848, Training Loss: 1.8697
Epoch [6/50], Validation Loss: 1.7358, Training Loss: 1.8402
Epoch [7/50], Validation Loss: 1.7143, Training Loss: 1.8137
Epoch [8/50], Validation Loss: 1.7030, Training Loss: 1.7950
Epoch [9/50], Validation Loss: 1.6925, Training Loss: 1.7776
Epoch [10/50], Validation Loss: 1.6811, Training Loss: 1.7613
Epoch [11/50], Validation Loss: 1.6663, Training Loss: 1.7488
Epoch [12/50], Validation Loss: 1.6554, Training Loss: 1.7370
Epoch [13/50], Validation Loss: 1.6568, Training Loss: 1.7278
Epoch [14/50], Validation Loss: 1.6422, Training Loss: 1.7181
Epoch [15/50], Validation Loss: 1.6349, Training Loss: 1.7110
Epoch [16/50], Validation Loss: 1.6376, Training Loss: 1.7030
Epoch [17/50], Va

In [9]:
"""
model = model.cpu()
torch.save(model, "C:/Users/coliv/summerProjects/Summer-Repository/moveClassifier.pt")
"""

In [16]:
model = torch.load("C:/Users/coliv/summerProjects/Summer-Repository/moveClassifier.pt").cpu()
val_data
testLoader = DataLoader(val_data, batch_size = len(val_data))
testLoader

with torch.no_grad() :
     for inputs, labels, possibilities, lengths, dists in testLoader :
               val_outputs = model(inputs, lengths)
               print(val_outputs.shape)
               outputs = val_outputs.view(-1, val_outputs.size(-1))
               possibilities = possibilities[:, :val_outputs.shape[2], :].view(-1, outputs.size(-1))
               distributions = dists[:, :val_outputs.shape[2], :].view(-1, outputs.size(-1))
               distributions = 0.1 * distributions
               outputs = torch.mul(outputs, possibilities)
               outputs = outputs + (2 * distributions)
               true = labels
               break

trueArray = true.view(-1, true.size(-1))

torch.Size([4199, 99, 354])


In [17]:

prob_matrix = torch.softmax(outputs, 1)
indices = np.argsort(prob_matrix.cpu().numpy(), axis = 1)

In [18]:
indices.shape
indices = indices[:, ::-1][:, :3]


In [19]:
results = []
for i, j in zip(trueArray, indices) :
    if i[0] == -1 :
        continue
    if int(i[0]) in j :
        results.append(1)
    else :
        results.append(0)

arr = np.array(results)

np.mean(arr)
        

0.8090160188429404

guessing: 1.4625%

best accuracy: 52%

When choosing top K = 1 : 52.055%

When choosing top K = 2 : 70.305%

When choosing top K = 3 : 80.902%



In [4]:
torch.load("C:/Users/coliv/summerProjects/Summer-Repository/actionClassifier.pt")

AttributeError: Can't get attribute 'ActionClassifier' on <module '__main__'>