In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import math
import time

In [2]:
torch.cuda.is_available()
torch.cuda.FloatTensor()

tensor([], device='cuda:0')

In [3]:
# dataset that directly loads the file into memory and then retrieves data as needed
# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        #if self.length > 1500000:
        #    self.length = 1500000
        self.f.seek(0)
        self.fileData = self.f.read()
        self.f.close()
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        gameData = self.fileData[idx * 27: idx * 27 + 27]
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))

In [None]:
# dataset that pulls data from file as requested
""" This dataset is very memory efficient, but it is heavily limited by storage bandwidth (i think)
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        self.f.seek(idx * 27)
        gameData = self.f.read(27)
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [None]:
# dataset that loads all data into GPU memory
""" #this works significantly better but we are hard limited by vram. maybe try to look into memory pinning more
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        if self.length > 1500000:
            self.length = 1500000
        self.f.seek(0)
        # build entire array into memory
        self.inputArray=[]
        self.finalScoreArray=[]
        for v in range(self.length):
            gameData = self.f.read(27)
            tempArray = []
            for x in range(25):
                tempArray += [v for v in format(gameData[x], "08b")]
            self.finalScoreArray.append(gameData[-1] * 256 + gameData[-2])
            self.inputArray.append([float(v) for v in tempArray][:120])
            if not v % 10000:
                print(f"loaded {v} our of {self.length}")
        self.inputTensor = torch.tensor(self.inputArray).to("cuda")
        self.outputTensor = torch.tensor(self.finalScoreArray).to("cuda")
        self.f.close()
    def __len__(self):
        return self.length
    def __getitem__(self, idx):
        return self.inputTensor[idx], self.outputTensor[idx]
"""

In [4]:
class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.relu_stack = torch.nn.Sequential(
            torch.nn.Linear(120, 1024),
            torch.nn.ReLU(),
            torch.nn.Linear(1024, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 1)
        )
        
    def forward(self, x):
        logits = self.relu_stack(x)
        output = torch.nn.Sigmoid()(logits)
        return output * 300

In [None]:
#model(trainData[0][0])

tensor(77.)

In [5]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # get number of samples
    totalBatches = len(dataloader)
    model.train() # need to look into what this exactly does
    startTime = time.time()
    for batchNum, (x, y) in enumerate(dataloader):
        # zero out gradients
        optimizer.zero_grad()
        
        # prediction + loss
        prediction = model(x.cuda()).squeeze(1)
        loss = loss_fn(prediction, y.cuda())
        
        #Backpropagation
        loss.backward()
        optimizer.step()
        
        if not batchNum % 1000:
            print(f"loss: {loss.item():.2f}\tbatch num: {batchNum}/{totalBatches}")
            print(f"took {time.time() - startTime} seconds")
            startTime = time.time()

In [6]:
def test_loop(dataloader, model):
    model.eval() # need to look into what this does
    size = len(dataloader.dataset)
    numBatches = len(dataloader)
    test_loss = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            pred = model(x.cuda())
            test_loss += loss_fn(pred, y.cuda()).item()
    
    print(f"Average Loss: {test_loss/numBatches}")

In [7]:
# HYPERPARAMS
batch_size = 64
learning_rate = 0.0005
epochs = 8

In [8]:
trainData = BowlingDataset("ScoreDetailDataset.txt")
trainDataLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True, pin_memory=True) #pin memory doesnt do shit bc the memory has to be grabbed from a physical file
testData = BowlingDataset("ScoreDetailDatasetVSplit.txt")
testDataLoader = DataLoader(testData, batch_size=batch_size, shuffle=True)

In [36]:
# verify that games loaded from the traindataset are valid
badGameCounter = 0
for x in range(len(trainData)):
    game, fScore = trainData[x]
    for frameNum in range(5):
        for pinNum in range(10):
            if game[20 * frameNum + pinNum] == float(0):
                if(game[20 * frameNum + pinNum + 10] == float(1)):
                    badGameCounter += 1
                    #print(f"problem with game number {x}")
                    #print(20 * frameNum + pinNum)
                    #print(20 * frameNum + pinNum + 10)
                    #assert(False)
    if not x % 1000:
        print(f"finish {x} games")
print(badGameCounter)

finish 0 games
finish 1000 games
finish 2000 games
finish 3000 games
finish 4000 games


KeyboardInterrupt: 

In [32]:
trainData[11][0][95]

tensor(1.)

In [9]:
# test to see if pinning is working
for batch_ndx, sample in enumerate(trainDataLoader):
    if batch_ndx > 5:
        break
    print(sample[1].is_pinned())
    print(sample[1].is_cuda)
print(len(trainDataLoader))

True
False
True
False
True
False
True
False
True
False
True
False
129769


In [9]:
model = TestModel().cuda()
loss_fn = torch.nn.MSELoss().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(epochs):
    startTime = time.time()
    print(f"Starting epoch {t}")
    train_loop(trainDataLoader, model, loss_fn, optimizer)
    print(f"Epoch {t} took {time.time() - startTime} seconds")
    test_loop(testDataLoader, model)
print("Finished")

Starting epoch 0
loss: 3927.55	batch num: 0/129769
took 13.695598840713501 seconds
loss: 246.49	batch num: 1000/129769
took 8.736359119415283 seconds
loss: 329.44	batch num: 2000/129769
took 8.632001876831055 seconds
loss: 263.71	batch num: 3000/129769
took 8.826998472213745 seconds
loss: 304.69	batch num: 4000/129769
took 8.888002634048462 seconds
loss: 300.23	batch num: 5000/129769
took 8.473998069763184 seconds
loss: 219.16	batch num: 6000/129769
took 8.69479513168335 seconds
loss: 285.26	batch num: 7000/129769
took 8.692012310028076 seconds
loss: 337.56	batch num: 8000/129769
took 8.558001041412354 seconds
loss: 398.71	batch num: 9000/129769
took 8.61580753326416 seconds
loss: 229.21	batch num: 10000/129769
took 8.483516693115234 seconds
loss: 270.11	batch num: 11000/129769
took 8.588001012802124 seconds
loss: 131.00	batch num: 12000/129769
took 8.56793761253357 seconds
loss: 228.11	batch num: 13000/129769
took 8.635440826416016 seconds
loss: 294.42	batch num: 14000/129769
took 8.9

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


loss: 244.36	batch num: 0/129769
took 0.44499850273132324 seconds
loss: 216.05	batch num: 1000/129769
took 7.765001535415649 seconds
loss: 246.71	batch num: 2000/129769
took 7.727999687194824 seconds
loss: 256.66	batch num: 3000/129769
took 7.728999853134155 seconds
loss: 165.94	batch num: 4000/129769
took 7.821998834609985 seconds
loss: 216.39	batch num: 5000/129769
took 7.745001792907715 seconds
loss: 221.65	batch num: 6000/129769
took 7.737999677658081 seconds
loss: 171.59	batch num: 7000/129769
took 7.721999883651733 seconds
loss: 274.66	batch num: 8000/129769
took 7.828999757766724 seconds
loss: 230.62	batch num: 9000/129769
took 7.75200343132019 seconds
loss: 234.74	batch num: 10000/129769
took 7.778996706008911 seconds
loss: 162.82	batch num: 11000/129769
took 7.747998952865601 seconds
loss: 138.32	batch num: 12000/129769
took 7.6970014572143555 seconds
loss: 308.49	batch num: 13000/129769
took 7.79699969291687 seconds
loss: 221.13	batch num: 14000/129769
took 7.686999559402466 

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "testmodel.weights")