In [1]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
import math
import time

In [2]:
torch.cuda.is_available()
torch.cuda.FloatTensor()

tensor([], device='cuda:0')

In [3]:
"""
# dataset that directly loads the file into memory and then retrieves data as needed
# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        #if self.length > 1500000:
        #    self.length = 1500000
        self.f.seek(0)
        self.fileData = self.f.read()
        self.f.close()
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        gameData = self.fileData[idx * 27: idx * 27 + 27]
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

'\n# dataset that directly loads the file into memory and then retrieves data as needed\n# this helps deal with the file read bottlenecks, but the data has to be transformed and then loaded to the gpu\nclass BowlingDataset(Dataset):\n    def __init__(self, fileName):\n        self.f = open(fileName, "rb")\n        self.length = int(os.stat(fileName).st_size/27)\n        #if self.length > 1500000:\n        #    self.length = 1500000\n        self.f.seek(0)\n        self.fileData = self.f.read()\n        self.f.close()\n    def __len__(self):\n        return self.length\n    \n    def __getitem__(self, idx):\n        gameData = self.fileData[idx * 27: idx * 27 + 27]\n        tempArray = []\n        for x in range(25):\n            tempArray += [v for v in format(gameData[x], "08b")]\n        finalScore = gameData[-1] * 256 + gameData[-2]\n        inputArray = [float(v) for v in tempArray][:120]\n        output = finalScore\n        return torch.tensor(inputArray[:]), torch.tensor(float(o

In [4]:
# dataset that pulls data from file as requested
""" This dataset is very memory efficient, but it is heavily limited by storage bandwidth (i think)
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        self.f.seek(idx * 27)
        gameData = self.f.read(27)
        tempArray = []
        for x in range(25):
            tempArray += [v for v in format(gameData[x], "08b")]
        finalScore = gameData[-1] * 256 + gameData[-2]
        inputArray = [float(v) for v in tempArray][:120]
        output = finalScore
        return torch.tensor(inputArray[:]), torch.tensor(float(output))
"""

' This dataset is very memory efficient, but it is heavily limited by storage bandwidth (i think)\nclass BowlingDataset(Dataset):\n    def __init__(self, fileName):\n        self.f = open(fileName, "rb")\n        self.length = int(os.stat(fileName).st_size/27)\n        \n    def __len__(self):\n        return self.length\n    \n    def __getitem__(self, idx):\n        self.f.seek(idx * 27)\n        gameData = self.f.read(27)\n        tempArray = []\n        for x in range(25):\n            tempArray += [v for v in format(gameData[x], "08b")]\n        finalScore = gameData[-1] * 256 + gameData[-2]\n        inputArray = [float(v) for v in tempArray][:120]\n        output = finalScore\n        return torch.tensor(inputArray[:]), torch.tensor(float(output))\n'

In [5]:
# dataset that loads all data into GPU memory
#this works significantly better but we are hard limited by vram. maybe try to look into memory pinning more
class BowlingDataset(Dataset):
    def __init__(self, fileName):
        self.f = open(fileName, "rb")
        self.length = int(os.stat(fileName).st_size/27)
        if self.length > 1500000:
            self.length = 1500000
        self.f.seek(0)
        # build entire array into memory
        self.inputArray=[]
        self.finalScoreArray=[]
        for v in range(self.length):
            gameData = self.f.read(27)
            tempArray = []
            for x in range(25):
                tempArray += [v for v in format(gameData[x], "08b")]
            self.finalScoreArray.append(gameData[-1] * 256 + gameData[-2])
            self.inputArray.append([float(v) for v in tempArray][:120])
            if not v % 10000:
                print(f"loaded {v} our of {self.length}")
        self.inputTensor = torch.tensor(self.inputArray).to("cuda")
        self.outputTensor = torch.tensor(self.finalScoreArray).to("cuda")
        self.inputArray = []
        self.finalScoreArray = []
        self.f.close()
    def __len__(self):
        return self.length
    def __getitem__(self, idx):
        return self.inputTensor[idx], self.outputTensor[idx]


In [6]:
class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.relu_stack = torch.nn.Sequential(
            torch.nn.Linear(120, 1024),
            torch.nn.ReLU(),
            torch.nn.Linear(1024, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 128),
            torch.nn.ReLU(),
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 1)
        )
        
    def forward(self, x):
        logits = self.relu_stack(x)
        output = torch.nn.Sigmoid()(logits)
        return output * 300

In [7]:
#trainData[0][0]

In [8]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # get number of samples
    totalBatches = len(dataloader)
    model.train() # need to look into what this exactly does
    startTime = time.time()
    for batchNum, (x, y) in enumerate(dataloader):
        # zero out gradients
        optimizer.zero_grad()
        
        # prediction + loss
        prediction = model(x).squeeze(1)
        loss = loss_fn(prediction, y)
        
        #Backpropagation
        loss.backward()
        optimizer.step()
        
        if not batchNum % 1000:
            print(f"loss: {loss.item():.2f}\tbatch num: {batchNum}/{totalBatches}")
            print(f"took {time.time() - startTime} seconds")
            startTime = time.time()

In [9]:
def test_loop(dataloader, model):
    model.eval() # need to look into what this does
    size = len(dataloader.dataset)
    numBatches = len(dataloader)
    test_loss = 0
    
    with torch.no_grad():
        for x, y in dataloader:
            pred = model(x)
            test_loss += loss_fn(pred, y).item()
    
    print(f"Average Loss: {test_loss/numBatches}")

In [10]:
# HYPERPARAMS
batch_size = 64
learning_rate = 0.0005
epochs = 4

In [11]:
trainData = BowlingDataset("ScoreDetailDataset.txt")
trainDataLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True) #pin memory doesnt do shit bc the memory has to be grabbed from a physical file
testData = BowlingDataset("ScoreDetailDatasetVSplit.txt")
testDataLoader = DataLoader(testData, batch_size=batch_size, shuffle=True)

loaded 0 our of 1122352
loaded 10000 our of 1122352
loaded 20000 our of 1122352
loaded 30000 our of 1122352
loaded 40000 our of 1122352
loaded 50000 our of 1122352
loaded 60000 our of 1122352
loaded 70000 our of 1122352
loaded 80000 our of 1122352
loaded 90000 our of 1122352
loaded 100000 our of 1122352
loaded 110000 our of 1122352
loaded 120000 our of 1122352
loaded 130000 our of 1122352
loaded 140000 our of 1122352
loaded 150000 our of 1122352
loaded 160000 our of 1122352
loaded 170000 our of 1122352
loaded 180000 our of 1122352
loaded 190000 our of 1122352
loaded 200000 our of 1122352
loaded 210000 our of 1122352
loaded 220000 our of 1122352
loaded 230000 our of 1122352
loaded 240000 our of 1122352
loaded 250000 our of 1122352
loaded 260000 our of 1122352
loaded 270000 our of 1122352
loaded 280000 our of 1122352
loaded 290000 our of 1122352
loaded 300000 our of 1122352
loaded 310000 our of 1122352
loaded 320000 our of 1122352
loaded 330000 our of 1122352
loaded 340000 our of 1122352

In [12]:
# test to see if pinning is working
for batch_ndx, sample in enumerate(trainDataLoader):
    if batch_ndx > 5:
        break
    print(sample[1].is_pinned())
    print(sample[1].is_cuda)
print(len(trainDataLoader))

False
True
False
True
False
True
False
True
False
True
False
True
17537


In [14]:
model = TestModel().cuda()
loss_fn = torch.nn.L1Loss().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(epochs):
    startTime = time.time()
    print(f"Starting epoch {t}")
    train_loop(trainDataLoader, model, loss_fn, optimizer)
    print(f"Epoch {t} took {time.time() - startTime} seconds")
    test_loop(testDataLoader, model)
print("Finished")

Starting epoch 0
loss: 48.64	batch num: 0/17537
took 0.59993577003479 seconds
loss: 14.01	batch num: 1000/17537
took 2.447175979614258 seconds
loss: 11.64	batch num: 2000/17537
took 2.3979651927948 seconds
loss: 13.19	batch num: 3000/17537
took 2.369196653366089 seconds
loss: 16.04	batch num: 4000/17537
took 2.392025947570801 seconds
loss: 14.06	batch num: 5000/17537
took 2.3783626556396484 seconds
loss: 14.09	batch num: 6000/17537
took 2.3726351261138916 seconds
loss: 12.03	batch num: 7000/17537
took 2.376999616622925 seconds
loss: 15.85	batch num: 8000/17537
took 2.3690481185913086 seconds
loss: 11.86	batch num: 9000/17537
took 2.3749992847442627 seconds
loss: 11.48	batch num: 10000/17537
took 2.36403751373291 seconds
loss: 11.45	batch num: 11000/17537
took 2.422710418701172 seconds
loss: 10.78	batch num: 12000/17537
took 2.3701069355010986 seconds
loss: 12.17	batch num: 13000/17537
took 2.382520914077759 seconds
loss: 11.46	batch num: 14000/17537
took 2.361063003540039 seconds
loss:

  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)


loss: 14.01	batch num: 1000/17537
took 2.3937435150146484 seconds
loss: 11.91	batch num: 2000/17537
took 2.378053665161133 seconds
loss: 13.75	batch num: 3000/17537
took 2.3750267028808594 seconds
loss: 12.15	batch num: 4000/17537
took 2.3909482955932617 seconds
loss: 12.05	batch num: 5000/17537
took 2.3746511936187744 seconds
loss: 12.04	batch num: 6000/17537
took 2.395317554473877 seconds
loss: 14.12	batch num: 7000/17537
took 2.3790385723114014 seconds
loss: 12.91	batch num: 8000/17537
took 2.3886942863464355 seconds
loss: 13.54	batch num: 9000/17537
took 2.3561174869537354 seconds
loss: 14.78	batch num: 10000/17537
took 2.363999366760254 seconds
loss: 12.96	batch num: 11000/17537
took 2.374544143676758 seconds
loss: 11.31	batch num: 12000/17537
took 2.388523817062378 seconds
loss: 12.94	batch num: 13000/17537
took 2.3710875511169434 seconds
loss: 15.53	batch num: 14000/17537
took 2.379133701324463 seconds
loss: 12.35	batch num: 15000/17537
took 2.3739523887634277 seconds
loss: 10.5

In [None]:
torch.save(model.state_dict(), "testmodel.weights")