In [2]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import math
import time

In [3]:
torch.cuda.is_available()
torch.cuda.FloatTensor()

tensor([], device='cuda:0')

In [None]:
"""
# dataset that directly loads the file into memory and then retrieves data as needed
# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        #if self.length > 1500000:
        #    self.length = 1500000
        self.f.seek(0)
        self.fileData = self.f.read()
        self.f.close()
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        gameData = self.fileData[idx * 27: idx * 27 + 27]
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [None]:
# dataset that pulls data from file as requested
""" This dataset is very memory efficient, but it is heavily limited by storage bandwidth (i think)
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        self.f.seek(idx * 27)
        gameData = self.f.read(27)
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

In [4]:
# dataset that loads all data into GPU memory
#this works significantly better but we are hard limited by vram. maybe try to look into memory pinning more
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        self.f.seek(0)
        # build entire array into memory
        self.inputArray=[]
        self.finalScoreArray=[]
        for v in range(self.length):
            gameData = self.f.read(27)
            tempArray = []
            for x in range(25):
                tempArray += [v for v in format(gameData[x], "08b")]
            self.finalScoreArray.append(gameData[-1] * 256 + gameData[-2])
            self.inputArray.append([float(v) for v in tempArray][:120])
            if not v % 10000:
                print(f"loaded {v} our of {self.length}")
        self.inputTensor = torch.tensor(self.inputArray).to("cuda")
        self.outputTensor = torch.tensor(self.finalScoreArray).to("cuda")
        self.inputArray = []
        self.finalScoreArray = []
        self.f.close()
    def __len__(self):
        return self.length
    def __getitem__(self, idx):
        return self.inputTensor[idx], self.outputTensor[idx]


In [10]:
class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.relu_stack = torch.nn.Sequential(
            torch.nn.Linear(120, 512),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(512, 256),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(256, 128),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.LeakyReLU(),
            torch.nn.Linear(64, 1)
        )
        
    def forward(self, x):
        logits = self.relu_stack(x)
        output = torch.nn.Sigmoid()(logits)
        return output * 300

In [5]:
#trainData[0][0]

In [11]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # get number of samples
    totalBatches = len(dataloader)
    model.train() # need to look into what this exactly does
    startTime = time.time()
    for batchNum, (x, y) in enumerate(dataloader):
        # zero out gradients
        optimizer.zero_grad()
        
        # prediction + loss
        prediction = model(x).squeeze(1)
        loss = loss_fn(prediction, y)
        
        #Backpropagation
        loss.backward()
        optimizer.step()
        
        if not batchNum % 1000:
            print(f"loss: {loss.item():.2f}\tbatch num: {batchNum}/{totalBatches}")
            print(f"took {time.time() - startTime} seconds")
            startTime = time.time()

In [12]:
def test_loop(dataloader, model, loss_fn):
    model.eval() # need to look into what this does
    size = len(dataloader.dataset)
    numBatches = len(dataloader)
    test_loss = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    
    print(f"Average Loss: {test_loss/numBatches}")

In [13]:
# HYPERPARAMS
batch_size = 32
learning_rate = 0.0001
epochs = 8

In [14]:
trainData = BowlingDataset("ScoreDetailDataset.txt")
trainDataLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True) #pin memory doesnt do shit bc the memory has to be grabbed from a physical file
testData = BowlingDataset("ScoreDetailDatasetVSplit.txt")
testDataLoader = DataLoader(testData, batch_size=batch_size, shuffle=True)

loaded 0 our of 1122352
loaded 10000 our of 1122352
loaded 20000 our of 1122352
loaded 30000 our of 1122352
loaded 40000 our of 1122352
loaded 50000 our of 1122352
loaded 60000 our of 1122352
loaded 70000 our of 1122352
loaded 80000 our of 1122352
loaded 90000 our of 1122352
loaded 100000 our of 1122352
loaded 110000 our of 1122352
loaded 120000 our of 1122352
loaded 130000 our of 1122352
loaded 140000 our of 1122352
loaded 150000 our of 1122352
loaded 160000 our of 1122352
loaded 170000 our of 1122352
loaded 180000 our of 1122352
loaded 190000 our of 1122352
loaded 200000 our of 1122352
loaded 210000 our of 1122352
loaded 220000 our of 1122352
loaded 230000 our of 1122352
loaded 240000 our of 1122352
loaded 250000 our of 1122352
loaded 260000 our of 1122352
loaded 270000 our of 1122352
loaded 280000 our of 1122352
loaded 290000 our of 1122352
loaded 300000 our of 1122352
loaded 310000 our of 1122352
loaded 320000 our of 1122352
loaded 330000 our of 1122352
loaded 340000 our of 1122352

In [None]:
# test to see if pinning is working
for batch_ndx, sample in enumerate(trainDataLoader):
    if batch_ndx > 5:
        break
    print(sample[1].is_pinned())
    print(sample[1].is_cuda)
print(len(trainDataLoader))

In [15]:
model = TestModel().cuda()
loss_fn = torch.nn.L1Loss().cuda()
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

for t in range(epochs):
    startTime = time.time()
    print(f"Starting epoch {t}")
    train_loop(trainDataLoader, model, loss_fn, optimizer)
    print(f"Epoch {t} took {time.time() - startTime} seconds")
    test_loop(testDataLoader, model, loss_fn)
print("Finished")


Starting epoch 0
loss: 35.55	batch num: 0/35074
took 6.389125347137451 seconds
loss: 15.57	batch num: 1000/35074
took 3.5994176864624023 seconds
loss: 11.47	batch num: 2000/35074
took 3.5137290954589844 seconds
loss: 11.63	batch num: 3000/35074
took 3.384000778198242 seconds
loss: 12.89	batch num: 4000/35074
took 3.6650032997131348 seconds
loss: 12.56	batch num: 5000/35074
took 3.7047336101531982 seconds
loss: 12.33	batch num: 6000/35074
took 3.2140188217163086 seconds
loss: 13.58	batch num: 7000/35074
took 3.3389999866485596 seconds
loss: 8.79	batch num: 8000/35074
took 3.441030502319336 seconds
loss: 12.13	batch num: 9000/35074
took 3.349972724914551 seconds
loss: 11.05	batch num: 10000/35074
took 3.3950345516204834 seconds
loss: 11.71	batch num: 11000/35074
took 3.369993209838867 seconds
loss: 13.85	batch num: 12000/35074
took 3.4109981060028076 seconds
loss: 10.49	batch num: 13000/35074
took 3.364001512527466 seconds
loss: 14.70	batch num: 14000/35074
took 3.10296893119812 seconds


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


loss: 10.17	batch num: 1000/35074
took 2.211000919342041 seconds
loss: 14.45	batch num: 2000/35074
took 2.174999952316284 seconds
loss: 14.28	batch num: 3000/35074
took 2.1590001583099365 seconds
loss: 12.13	batch num: 4000/35074
took 2.177999496459961 seconds
loss: 14.13	batch num: 5000/35074
took 2.188000440597534 seconds
loss: 11.84	batch num: 6000/35074
took 2.3500001430511475 seconds
loss: 13.67	batch num: 7000/35074
took 2.4529998302459717 seconds
loss: 11.66	batch num: 8000/35074
took 2.4800004959106445 seconds
loss: 10.06	batch num: 9000/35074
took 2.4589996337890625 seconds
loss: 10.15	batch num: 10000/35074
took 2.2939999103546143 seconds
loss: 12.94	batch num: 11000/35074
took 2.2140002250671387 seconds
loss: 11.55	batch num: 12000/35074
took 2.173999786376953 seconds
loss: 11.82	batch num: 13000/35074
took 2.181999444961548 seconds
loss: 11.72	batch num: 14000/35074
took 2.181000232696533 seconds
loss: 11.59	batch num: 15000/35074
took 2.172999858856201 seconds
loss: 14.94	

loss: 11.88	batch num: 13000/35074
took 2.9670448303222656 seconds
loss: 12.39	batch num: 14000/35074
took 2.733955144882202 seconds
loss: 9.54	batch num: 15000/35074
took 3.294999122619629 seconds
loss: 8.50	batch num: 16000/35074
took 3.4190030097961426 seconds
loss: 13.98	batch num: 17000/35074
took 2.938997983932495 seconds
loss: 11.74	batch num: 18000/35074
took 3.1560044288635254 seconds
loss: 10.42	batch num: 19000/35074
took 3.211996078491211 seconds
loss: 12.75	batch num: 20000/35074
took 3.3030343055725098 seconds
loss: 13.00	batch num: 21000/35074
took 3.664668321609497 seconds
loss: 13.04	batch num: 22000/35074
took 3.493999719619751 seconds
loss: 14.45	batch num: 23000/35074
took 3.550999879837036 seconds
loss: 13.85	batch num: 24000/35074
took 3.585998773574829 seconds
loss: 11.69	batch num: 25000/35074
took 3.5340027809143066 seconds
loss: 11.82	batch num: 26000/35074
took 3.4759981632232666 seconds
loss: 12.52	batch num: 27000/35074
took 4.3420023918151855 seconds
loss:

loss: 9.71	batch num: 25000/35074
took 2.2959988117218018 seconds
loss: 11.25	batch num: 26000/35074
took 2.296001434326172 seconds
loss: 13.62	batch num: 27000/35074
took 2.3170340061187744 seconds
loss: 10.77	batch num: 28000/35074
took 2.330963611602783 seconds
loss: 12.03	batch num: 29000/35074
took 2.3030362129211426 seconds
loss: 11.12	batch num: 30000/35074
took 2.3109664916992188 seconds
loss: 9.31	batch num: 31000/35074
took 2.294999837875366 seconds
loss: 11.46	batch num: 32000/35074
took 2.3109993934631348 seconds
loss: 11.69	batch num: 33000/35074
took 2.2937746047973633 seconds
loss: 9.53	batch num: 34000/35074
took 2.270167112350464 seconds
loss: 11.37	batch num: 35000/35074
took 2.2850401401519775 seconds
Epoch 7 took 95.03298473358154 seconds
Average Loss: 31.27614003419876
Finished


In [None]:
torch.save(model.state_dict(), "testmodel.weights")

In [11]:
fivePins = 0
tenPins = 0
fifteenPins = 0
total = 0
for x in range(len(testData)):
    total += 1
    pred = model(testData[x][0])
    diff = abs(pred - testData[x][1])
    if diff <= 5:
        fivePins += 1
    if diff <= 10:
        tenPins += 1
    if diff <= 15:
        fifteenPins += 1
print(f"Model is within five pins {fivePins * 100 / total}% of the time")
print(f"Model is within ten pins {tenPins * 100 / total}% of the time")
print(f"Model is within fifteen pins {fifteenPins * 100 / total}% of the time")

Model is within five pins 30.7% of the time
Model is within ten pins 50.8% of the time
Model is within fifteen pins 67.5% of the time


The R-squared for most of these models hovers around 0.73, which is pretty good :)


Model with L1 Loss, adam at lr=0.0005, batchsize = 64, and 8 epochs:
    Model is within five pins 30.5% of the time
    Model is within ten pins 50.4% of the time
    Model is within fifteen pins 65.8% of the time

Model with L1 Loss, adam at lr=0.001, batchsize = 64, and 8 epochs:
    Model is within five pins 30.2% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 66.7% of the time

More neurons Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 29.5% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.9% of the time

less neurons Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 30.6% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 67.5% of the time

Leaky Relu Model with L1 Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 31.8% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with MSE Loss, adam at lr=0.001, batchsize = 32, and 8 epochs:
    Model is within five pins 29.1% of the time
    Model is within ten pins 49.8% of the time
    Model is within fifteen pins 66.5% of the time

Leaky Relu Model with L1 Loss, adam at lr=0.00005, batchsize = 32, and 8 epochs:
    Model is within five pins 31.8% of the time
    Model is within ten pins 51.2% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.0001, batchsize = 32, and 8 epochs:\
    Model is within five pins 30.0% of the time
    Model is within ten pins 51.9% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 16, and 8 epochs:\
    Model is within five pins 29.5% of the tim e
    Model is within ten pins 50.6% of the time
    Model is within fifteen pins 67.8% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 4, and 8 epochs:
    Model is within five pins 31.3% of the time
    Model is within ten pins 52.2% of the time
    Model is within fifteen pins 68.2% of the time

Leaky Relu Model with L1 Loss, RMSprop at lr=0.000001, batchsize = 1, and 8 epochs:
    Model is within five pins 31.0% of the time
    Model is within ten pins 52.9% of the time
    Model is within fifteen pins 67.7% of the time

In [8]:
#first, calculate mean score. just gonna manually do this by iterating over dataset
"""
tSum = 0
tCount = 0
for row in testData:
    tCount += 1
    tSum += int(row[1])
    if not tCount % 100000:
        print(f"finished with {tCount}")
mean = tSum/tCount
print(mean)
"""
testMean=190.797
trainMean=191.9994600624403

190.797


In [18]:
"""
SST = 0
tCount = 0
for row in testData:
    tCount += 1
    SST += (int(row[1]) - mean) ** 2
    if not tCount % 50000:
        print(tCount)
print(SST)
"""
testSST = 949951.791
trainSST = 1108731521.6719563

In [16]:
#calculate SSE, which sums of squared residuals (errors)
SSE = 0
tCount = 0
for row in testData:
    tCount += 1
    pred = model(row[0])
    resid = pred - row[1]
    SSE += resid ** 2
    if not tCount % 50000:
        print(tCount)
print(SSE)

tensor([273146.7188], device='cuda:0', grad_fn=<AddBackward0>)


In [18]:
#Model R2 on model data is about 0.7311
#Model R2 on test data is about 0.7125

tensor([0.7311], device='cuda:0', grad_fn=<RsubBackward1>)


In [19]:
print(1 - SSE/testSST)

tensor([0.7125], device='cuda:0', grad_fn=<RsubBackward1>)
