In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import math
import time

In [3]:
print(torch.cuda.is_available())

True


In [4]:
# dataset that directly loads the file into memory and then retrieves data as needed
# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        #if self.length > 1500000:
        #    self.length = 1500000
        self.f.seek(0)
        self.fileData = self.f.read()
        self.f.close()
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        gameData = self.fileData[idx * 27: idx * 27 + 27]
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))

In [5]:
class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.relu_stack = torch.nn.Sequential(
            torch.nn.Linear(120, 1024),
            torch.nn.ReLU(),
            torch.nn.Linear(1024, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 1)
        )
        
    def forward(self, x):
        self.logits = self.relu_stack(x)
        self.output = torch.nn.Sigmoid()(self.logits)
        return self.output * 300

In [None]:
def train_loop(dataloader, modelList, loss_fn, optimizerList):
    size = len(dataloader.dataset) # get number of samples
    totalBatches = len(dataloader)
    for model in modelList:
        model.train() # need to look into what this exactly does
    startTime = time.time()
    for batchNum, (x, y) in enumerate(dataloader):
        #grab model input and label as tensors on the gpu
        xTensor = x.cuda()
        yTensor = y.cuda()
        
        # zero out gradients of each optimizer
        for optimizer in optimizerList:
            optimizer.zero_grad()

        # run though each model prediction + loss + backpropagation
        
        # prediction + loss on each model
        for x in range(len(modelList)):
            prediction = modelList[x](xTensor).squeeze(1)
            loss = loss_fn(prediction, yTensor)
        
            #Backpropagation
            loss.backward()
            optimizer.step()
        
        if not batchNum % 1000:
            print(f"loss: {loss.item():.2f}\tbatch num: {batchNum}/{totalBatches}")
            print(f"took {time.time() - startTime} seconds")
            startTime = time.time()