In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import math
import time

In [2]:
torch.cuda.is_available()
torch.cuda.FloatTensor()

tensor([], device='cuda:0')

In [None]:
"""
# dataset that directly loads the file into memory and then retrieves data as needed
# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        #if self.length > 1500000:
        #    self.length = 1500000
        self.f.seek(0)
        self.fileData = self.f.read()
        self.f.close()
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        gameData = self.fileData[idx * 27: idx * 27 + 27]
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [None]:
# dataset that pulls data from file as requested
""" This dataset is very memory efficient, but it is heavily limited by storage bandwidth (i think)
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        self.f.seek(idx * 27)
        gameData = self.f.read(27)
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [3]:
# dataset that loads all data into GPU memory
#this works significantly better but we are hard limited by vram. maybe try to look into memory pinning more
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        self.f.seek(0)
        # build entire array into memory
        self.inputArray=[]
        self.finalScoreArray=[]
        for v in range(self.length):
            gameData = self.f.read(27)
            tempArray = []
            for x in range(25):
                tempArray += [v for v in format(gameData[x], "08b")]
            self.finalScoreArray.append(gameData[-1] * 256 + gameData[-2])
            self.inputArray.append([float(v) for v in tempArray][:120])
            if not v % 10000:
                print(f"loaded {v} our of {self.length}")
        self.inputTensor = torch.tensor(self.inputArray).to("cuda")
        self.outputTensor = torch.tensor(self.finalScoreArray).to("cuda")
        self.inputArray = []
        self.finalScoreArray = []
        self.f.close()
    def __len__(self):
        return self.length
    def __getitem__(self, idx):
        return self.inputTensor[idx], self.outputTensor[idx]


In [14]:
class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.relu_stack = torch.nn.Sequential(
            torch.nn.Linear(120, 512),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(512, 256),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(256, 128),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(64, 1)
        )
        
    def forward(self, x):
        logits = self.relu_stack(x)
        output = torch.nn.Sigmoid()(logits)
        return output * 300

In [None]:
#trainData[0][0]

In [23]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # get number of samples
    totalBatches = len(dataloader)
    model.train() # need to look into what this exactly does
    startTime = time.time()
    for batchNum, (x, y) in enumerate(dataloader):
        # zero out gradients
        optimizer.zero_grad()
        
        # prediction + loss
        prediction = model(x).squeeze(1)
        loss = loss_fn(prediction, y)
        
        #Backpropagation
        loss.backward()
        optimizer.step()
        
        if not batchNum % 1000:
            print(f"loss: {loss.item():.2f}\tbatch num: {batchNum}/{totalBatches}")
            print(f"took {time.time() - startTime} seconds")
            startTime = time.time()

In [24]:
def test_loop(dataloader, model, loss_fn):
    model.eval() # need to look into what this does
    size = len(dataloader.dataset)
    numBatches = len(dataloader)
    test_loss = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    
    print(f"Average Loss: {test_loss/numBatches}")

In [51]:
# HYPERPARAMS
batch_size = 1
learning_rate = 0.00001
epochs = 8

In [48]:
trainData = BowlingDataset("ScoreDetailDataset.txt")
trainDataLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True) #pin memory doesnt do shit bc the memory has to be grabbed from a physical file
testData = BowlingDataset("ScoreDetailDatasetVSplit.txt")
testDataLoader = DataLoader(testData, batch_size=batch_size, shuffle=True)

loaded 0 our of 1122352
loaded 10000 our of 1122352
loaded 20000 our of 1122352
loaded 30000 our of 1122352
loaded 40000 our of 1122352
loaded 50000 our of 1122352
loaded 60000 our of 1122352
loaded 70000 our of 1122352
loaded 80000 our of 1122352
loaded 90000 our of 1122352
loaded 100000 our of 1122352
loaded 110000 our of 1122352
loaded 120000 our of 1122352
loaded 130000 our of 1122352
loaded 140000 our of 1122352
loaded 150000 our of 1122352
loaded 160000 our of 1122352
loaded 170000 our of 1122352
loaded 180000 our of 1122352
loaded 190000 our of 1122352
loaded 200000 our of 1122352
loaded 210000 our of 1122352
loaded 220000 our of 1122352
loaded 230000 our of 1122352
loaded 240000 our of 1122352
loaded 250000 our of 1122352
loaded 260000 our of 1122352
loaded 270000 our of 1122352
loaded 280000 our of 1122352
loaded 290000 our of 1122352
loaded 300000 our of 1122352
loaded 310000 our of 1122352
loaded 320000 our of 1122352
loaded 330000 our of 1122352
loaded 340000 our of 1122352

In [None]:
# test to see if pinning is working
for batch_ndx, sample in enumerate(trainDataLoader):
    if batch_ndx > 5:
        break
    print(sample[1].is_pinned())
    print(sample[1].is_cuda)
print(len(trainDataLoader))

In [52]:
model = TestModel().cuda()
loss_fn = torch.nn.L1Loss().cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

for t in range(epochs):
    startTime = time.time()
    print(f"Starting epoch {t}")
    train_loop(trainDataLoader, model, loss_fn, optimizer)
    print(f"Epoch {t} took {time.time() - startTime} seconds")
    test_loop(testDataLoader, model, loss_fn)
print("Finished")


Starting epoch 0
loss: 63.89	batch num: 0/280588
took 0.06099867820739746 seconds
loss: 23.71	batch num: 1000/280588
took 1.7888431549072266 seconds
loss: 21.93	batch num: 2000/280588
took 1.7803497314453125 seconds
loss: 34.66	batch num: 3000/280588
took 1.8135287761688232 seconds
loss: 25.80	batch num: 4000/280588
took 1.8107178211212158 seconds
loss: 5.83	batch num: 5000/280588
took 1.8010001182556152 seconds
loss: 5.24	batch num: 6000/280588
took 2.2839996814727783 seconds
loss: 9.97	batch num: 7000/280588
took 1.9917855262756348 seconds
loss: 21.69	batch num: 8000/280588
took 1.877000331878662 seconds
loss: 12.04	batch num: 9000/280588
took 1.890000343322754 seconds
loss: 23.01	batch num: 10000/280588
took 1.8830013275146484 seconds
loss: 15.24	batch num: 11000/280588
took 1.8259997367858887 seconds
loss: 14.30	batch num: 12000/280588
took 1.9035863876342773 seconds
loss: 8.43	batch num: 13000/280588
took 1.8500003814697266 seconds
loss: 18.19	batch num: 14000/280588
took 1.873130

In [None]:
torch.save(model.state_dict(), "testmodel.weights")

In [53]:
fivePins = 0
tenPins = 0
fifteenPins = 0
total = 0
for x in range(len(testData)):
    total += 1
    pred = model(testData[x][0])
    diff = abs(pred - testData[x][1])
    if diff <= 5:
        fivePins += 1
    if diff <= 10:
        tenPins += 1
    if diff <= 15:
        fifteenPins += 1
print(f"Model is within five pins {fivePins * 100 / total}% of the time")
print(f"Model is within ten pins {tenPins * 100 / total}% of the time")
print(f"Model is within fifteen pins {fifteenPins * 100 / total}% of the time")

Model is within five pins 31.0% of the time
Model is within ten pins 52.9% of the time
Model is within fifteen pins 67.7% of the time



Model with L1 Loss, adam at lr=0.0005, batchsize = 64, and 8 epochs:
    Model is within five pins 30.5% of the time
    Model is within ten pins 50.4% of the time
    Model is within fifteen pins 65.8% of the time

Model with L1 Loss, adam at lr=0.001, batchsize = 64, and 8 epochs:
    Model is within five pins 30.2% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 66.7% of the time

More neurons Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 29.5% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.9% of the time

less neurons Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 30.6% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 67.5% of the time

Leaky Relu Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 31.8% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with MSE Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 29.1% of the time
    Model is within ten pins 49.8% of the time
    Model is within fifteen pins 66.5% of the time

Leaky Relu Model with L1 Loss, adam at lr=0.00005, batchsize = 32, and 8 epochs:
    Model is within five pins 31.8% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.0001, batchsize = 32, and 8 epochs:\
    Model is within five pins 30.0% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 16, and 8 epochs:\
    Model is within five pins 29.5% of the tim e
    Model is within ten pins 50.6% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 4, and 8 epochs:
    Model is within five pins 31.3% of the time
    Model is within ten pins 52.2% of the time
    Model is within fifteen pins 68.2% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 1, and 8 epochs:
    Model is within five pins 31.0% of the time
    Model is within ten pins 52.9% of the time
    Model is within fifteen pins 67.7% of the time