In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import math
import time

In [2]:
torch.cuda.is_available()
torch.cuda.FloatTensor()

tensor([], device='cuda:0')

In [None]:
"""
# dataset that directly loads the file into memory and then retrieves data as needed
# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        #if self.length > 1500000:
        #    self.length = 1500000
        self.f.seek(0)
        self.fileData = self.f.read()
        self.f.close()
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        gameData = self.fileData[idx * 27: idx * 27 + 27]
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [None]:
# dataset that pulls data from file as requested
""" This dataset is very memory efficient, but it is heavily limited by storage bandwidth (i think)
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        self.f.seek(idx * 27)
        gameData = self.f.read(27)
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [3]:
# dataset that loads all data into GPU memory
#this works significantly better but we are hard limited by vram. maybe try to look into memory pinning more
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        self.f.seek(0)
        # build entire array into memory
        self.inputArray=[]
        self.finalScoreArray=[]
        for v in range(self.length):
            gameData = self.f.read(27)
            tempArray = []
            for x in range(25):
                tempArray += [v for v in format(gameData[x], "08b")]
            self.finalScoreArray.append(gameData[-1] * 256 + gameData[-2])
            self.inputArray.append([float(v) for v in tempArray][:120])
            if not v % 10000:
                print(f"loaded {v} our of {self.length}")
        self.inputTensor = torch.tensor(self.inputArray).to("cuda")
        self.outputTensor = torch.tensor(self.finalScoreArray).to("cuda")
        self.inputArray = []
        self.finalScoreArray = []
        self.f.close()
    def __len__(self):
        return self.length
    def __getitem__(self, idx):
        return self.inputTensor[idx], self.outputTensor[idx]


In [4]:
class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.relu_stack = torch.nn.Sequential(
            torch.nn.Linear(120, 512),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(512, 256),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(256, 128),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(64, 1)
        )
        
    def forward(self, x):
        logits = self.relu_stack(x)
        output = torch.nn.Sigmoid()(logits)
        return output * 300

In [5]:
#trainData[0][0]

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # get number of samples
    totalBatches = len(dataloader)
    model.train() # need to look into what this exactly does
    startTime = time.time()
    for batchNum, (x, y) in enumerate(dataloader):
        # zero out gradients
        optimizer.zero_grad()
        
        # prediction + loss
        prediction = model(x).squeeze(1)
        loss = loss_fn(prediction, y)
        
        #Backpropagation
        loss.backward()
        optimizer.step()
        
        if not batchNum % 1000:
            print(f"loss: {loss.item():.2f}\tbatch num: {batchNum}/{totalBatches}")
            print(f"took {time.time() - startTime} seconds")
            startTime = time.time()

In [7]:
def test_loop(dataloader, model, loss_fn):
    model.eval() # need to look into what this does
    size = len(dataloader.dataset)
    numBatches = len(dataloader)
    test_loss = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    
    print(f"Average Loss: {test_loss/numBatches}")

In [8]:
# HYPERPARAMS
batch_size = 32
learning_rate = 0.0001
epochs = 8

In [9]:
trainData = BowlingDataset("ScoreDetailDataset.txt")
trainDataLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True) #pin memory doesnt do shit bc the memory has to be grabbed from a physical file
testData = BowlingDataset("ScoreDetailDatasetVSplit.txt")
testDataLoader = DataLoader(testData, batch_size=batch_size, shuffle=True)

loaded 0 our of 1122352
loaded 10000 our of 1122352
loaded 20000 our of 1122352
loaded 30000 our of 1122352
loaded 40000 our of 1122352
loaded 50000 our of 1122352
loaded 60000 our of 1122352
loaded 70000 our of 1122352
loaded 80000 our of 1122352
loaded 90000 our of 1122352
loaded 100000 our of 1122352
loaded 110000 our of 1122352
loaded 120000 our of 1122352
loaded 130000 our of 1122352
loaded 140000 our of 1122352
loaded 150000 our of 1122352
loaded 160000 our of 1122352
loaded 170000 our of 1122352
loaded 180000 our of 1122352
loaded 190000 our of 1122352
loaded 200000 our of 1122352
loaded 210000 our of 1122352
loaded 220000 our of 1122352
loaded 230000 our of 1122352
loaded 240000 our of 1122352
loaded 250000 our of 1122352
loaded 260000 our of 1122352
loaded 270000 our of 1122352
loaded 280000 our of 1122352
loaded 290000 our of 1122352
loaded 300000 our of 1122352
loaded 310000 our of 1122352
loaded 320000 our of 1122352
loaded 330000 our of 1122352
loaded 340000 our of 1122352

In [None]:
# test to see if pinning is working
for batch_ndx, sample in enumerate(trainDataLoader):
    if batch_ndx > 5:
        break
    print(sample[1].is_pinned())
    print(sample[1].is_cuda)
print(len(trainDataLoader))

In [10]:
model = TestModel().cuda()
loss_fn = torch.nn.L1Loss().cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

for t in range(epochs):
    startTime = time.time()
    print(f"Starting epoch {t}")
    train_loop(trainDataLoader, model, loss_fn, optimizer)
    print(f"Epoch {t} took {time.time() - startTime} seconds")
    test_loop(testDataLoader, model, loss_fn)
print("Finished")


Starting epoch 0
loss: 34.86	batch num: 0/35074
took 17.76923894882202 seconds
loss: 11.46	batch num: 1000/35074
took 1.9395813941955566 seconds
loss: 15.26	batch num: 2000/35074
took 1.9139950275421143 seconds
loss: 14.10	batch num: 3000/35074
took 1.917661428451538 seconds
loss: 12.36	batch num: 4000/35074
took 1.9121284484863281 seconds
loss: 10.97	batch num: 5000/35074
took 1.9018332958221436 seconds
loss: 13.84	batch num: 6000/35074
took 1.908200740814209 seconds
loss: 15.09	batch num: 7000/35074
took 1.9136970043182373 seconds
loss: 11.22	batch num: 8000/35074
took 1.9021010398864746 seconds
loss: 12.83	batch num: 9000/35074
took 1.9036426544189453 seconds
loss: 10.95	batch num: 10000/35074
took 1.8979237079620361 seconds
loss: 11.05	batch num: 11000/35074
took 1.9032649993896484 seconds
loss: 10.96	batch num: 12000/35074
took 1.9398694038391113 seconds
loss: 12.76	batch num: 13000/35074
took 1.9101498126983643 seconds
loss: 13.79	batch num: 14000/35074
took 1.9042103290557861 se

  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


Average Loss: 30.749379575252533
Starting epoch 1
loss: 12.77	batch num: 0/35074
took 0.03699803352355957 seconds
loss: 8.78	batch num: 1000/35074
took 1.8810009956359863 seconds
loss: 11.14	batch num: 2000/35074
took 1.8700006008148193 seconds
loss: 13.06	batch num: 3000/35074
took 1.878000259399414 seconds
loss: 15.17	batch num: 4000/35074
took 1.858999252319336 seconds
loss: 12.75	batch num: 5000/35074
took 1.8670001029968262 seconds
loss: 11.53	batch num: 6000/35074
took 1.876999855041504 seconds
loss: 14.17	batch num: 7000/35074
took 1.8609998226165771 seconds
loss: 10.93	batch num: 8000/35074
took 1.8810005187988281 seconds
loss: 11.58	batch num: 9000/35074
took 1.86799955368042 seconds
loss: 13.65	batch num: 10000/35074
took 1.8489999771118164 seconds
loss: 15.98	batch num: 11000/35074
took 1.8620004653930664 seconds
loss: 13.93	batch num: 12000/35074
took 1.9570000171661377 seconds
loss: 16.58	batch num: 13000/35074
took 1.875 seconds
loss: 9.78	batch num: 14000/35074
took 1.90

In [None]:
torch.save(model.state_dict(), "testmodel.weights")

In [11]:
fivePins = 0
tenPins = 0
fifteenPins = 0
total = 0
for x in range(len(testData)):
    total += 1
    pred = model(testData[x][0])
    diff = abs(pred - testData[x][1])
    if diff <= 5:
        fivePins += 1
    if diff <= 10:
        tenPins += 1
    if diff <= 15:
        fifteenPins += 1
print(f"Model is within five pins {fivePins * 100 / total}% of the time")
print(f"Model is within ten pins {tenPins * 100 / total}% of the time")
print(f"Model is within fifteen pins {fifteenPins * 100 / total}% of the time")

Model is within five pins 30.7% of the time
Model is within ten pins 50.8% of the time
Model is within fifteen pins 67.5% of the time


The R-squared for most of these models hovers around 0.73, which is pretty good :)


Model with L1 Loss, adam at lr=0.0005, batchsize = 64, and 8 epochs:
    Model is within five pins 30.5% of the time
    Model is within ten pins 50.4% of the time
    Model is within fifteen pins 65.8% of the time

Model with L1 Loss, adam at lr=0.001, batchsize = 64, and 8 epochs:
    Model is within five pins 30.2% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 66.7% of the time

More neurons Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 29.5% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.9% of the time

less neurons Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 30.6% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 67.5% of the time

Leaky Relu Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 31.8% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with MSE Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 29.1% of the time
    Model is within ten pins 49.8% of the time
    Model is within fifteen pins 66.5% of the time

Leaky Relu Model with L1 Loss, adam at lr=0.00005, batchsize = 32, and 8 epochs:
    Model is within five pins 31.8% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.0001, batchsize = 32, and 8 epochs:\
    Model is within five pins 30.0% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 16, and 8 epochs:\
    Model is within five pins 29.5% of the tim e
    Model is within ten pins 50.6% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 4, and 8 epochs:
    Model is within five pins 31.3% of the time
    Model is within ten pins 52.2% of the time
    Model is within fifteen pins 68.2% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 1, and 8 epochs:
    Model is within five pins 31.0% of the time
    Model is within ten pins 52.9% of the time
    Model is within fifteen pins 67.7% of the time

In [None]:
#first, calculate mean score. just gonna manually do this by iterating over dataset
"""
tSum = 0
tCount = 0
for row in trainData:
    tCount += 1
    tSum += int(row[1])
    if not tCount % 100000:
        print(f"finished with {tCount}")
mean = tSum/tCount
print(mean)
"""
mean=191.9994600624403

In [17]:
"""
SST = 0
tCount = 0
for row in trainData:
    tCount += 1
    SST += (int(row[1]) - mean) ** 2
    if not tCount % 50000:
        print(tCount)
print(SST)
"""
SST = 1108731521.6719563

In [13]:
#calculate SSE, which sums of squared residuals (errors)
SSE = 0
tCount = 0
for row in trainData:
    tCount += 1
    pred = model(row[0])
    resid = pred - row[1]
    SSE += resid ** 2
    if not tCount % 50000:
        print(tCount)
print(SSE)

50000
100000
150000
200000
250000
300000
350000
400000
450000
500000
550000
600000
650000
700000
750000
800000
850000
900000
950000
1000000
1050000
1100000
tensor([2.9814e+08], device='cuda:0', grad_fn=<AddBackward0>)


In [18]:
print(1 - (SSE/SST))

tensor([0.7311], device='cuda:0', grad_fn=<RsubBackward1>)
