In [None]:
import torch, torchtext, numpy as np
import pandas as pd, csv
from torch import nn, optim
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import pdb
torch.manual_seed(291)
np.random.seed(291)
import pandas as pd

In [None]:
#Code adapted and inspired by movie recommender model in lecture 6
class Dataset(torch.utils.data.Dataset):
    def __init__(self, fn, books):
        self.dataframe = pd.read_csv(fn)
        self.books = pd.read_csv(books)
        u2n = { u: n for n, u in enumerate(self.dataframe['user_id'].unique()) } 
        m2n = { m: n for n, m in enumerate(self.dataframe['book_id'].unique()) } 
        self.dataframe['user_id'] = self.dataframe['user_id'].apply(lambda u: u2n[u])
        self.dataframe['book_id'] = self.dataframe['book_id'].apply(lambda m: m2n[m])
        self.coords = torch.LongTensor(self.dataframe[['user_id','book_id']].values) # (userId,bookId) <- coordinates
        self.ratings = torch.FloatTensor(self.dataframe['rating'].values)
        self.n_users = self.dataframe['user_id'].nunique()
        self.n_books = self.dataframe['book_id'].nunique()

    def __len__(self):
        return len(self.coords)
    
    def get_book_title(self, book_id): #This method currently has an off by one error, waiting to address until we use this code for serving recommnedations for users, as datasets may change by then - Declan
      return (self.books[self.books["book_id"] == book_id]).loc[:,"original_title"].values[0] 

    def __getitem__(self, i):  
      return (self.coords[i], self.ratings[i])           

In [None]:
!wget https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv
!wget https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv
ds_full = Dataset('ratings.csv', 'books.csv')
n_train = int(0.8 * len(ds_full))
n_test = len(ds_full) - n_train
rng = torch.Generator().manual_seed(291)
ds_train, ds_test = torch.utils.data.random_split(ds_full, [n_train, n_test], rng)
len(ds_full)

--2021-04-11 07:38:27--  https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/ratings.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 72126826 (69M) [text/plain]
Saving to: ‘ratings.csv’


2021-04-11 07:38:30 (126 MB/s) - ‘ratings.csv’ saved [72126826/72126826]

--2021-04-11 07:38:30--  https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3286659 (3.1M) [text/plain]
Saving to: ‘books.csv’


2021-04-11 07:38:30 (54.1 MB/s) - ‘books.csv’ saved [32866

5976479

In [None]:
# extracting elements from page for Amazon dataset -- in the end the Amazon dataset was determined to be too big to use
"""
import torch, torchtext, numpy as np
import pandas as pd, csv
from torch import nn, optim
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import pdb
torch.manual_seed(291)
np.random.seed(291)
import pandas as pd
import requests
import gzip
!pip install bs4
from bs4 import BeautifulSoup

HEADERS = ({'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
            'Accept-Language': 'en-US, en;q=0.5'})   # need to be user-agent to scrape info from page
URL = "https://www.amazon.com/dp/0001061240"    # random book to test the asin extension
webpage = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(webpage.content, "lxml")

title = soup.find("span", attrs={"id":'productTitle'})  # extract specific element from page
title_value = title.string # convert to string
title_string = title_value.strip() # get rid of empty space
title_string

def parse(path):
  g = gzip.open(path, 'rb')
  for l in g:
    yield json.loads(l)

def getDF(path):
  i = 0
  df = {}
  for d in parse(path):
    df[i] = d
    i += 1
  return pd.DataFrame.from_dict(df, orient='index')

df = getDF('reviews_Books.json.gz')

def getURL(asin):
  filler_length = 10 - len(asin)
  filler = ""
  for x in range(filler_length):
    filler += str(0)
  filler += str(asin)
  filler = "https://www.amazon.com/dp/" + filler
  return filler
"""

In [None]:
#Code adapted and inspired by movie recommender model in lecture 6
class BookRecommenderEmbeddingML(nn.Module):
  def __init__(self, n_users, n_books, emb_dim):
    super(BookRecommenderEmbeddingML, self).__init__()
    self.user_embedding = nn.Embedding(n_users, emb_dim)
    self.user_bias = nn.Embedding(n_users, 1) #I question if this is helpful for our use of this model, due to our plan to use users in this dataset to represent multiple users - Declan
    self.book_embedding = nn.Embedding(n_books, emb_dim)
    self.book_bias = nn.Embedding(n_books, 1)
    nn.init.xavier_uniform_(self.user_embedding.weight)
    nn.init.xavier_uniform_(self.book_embedding.weight)
    nn.init.zeros_(self.user_bias.weight)
    nn.init.zeros_(self.book_bias.weight)
    self.dropout = nn.Dropout(0.25)
  
  def forward(self, samples):
    users = self.user_embedding(samples[:,0]) # gets embedding of users
    users = self.dropout(users)
    books = self.book_embedding(samples[:,1])
    books = self.dropout(books)
    dot = (users * books).sum(1)
    user_b = self.user_bias(samples[:,0]).squeeze()
    book_b = self.book_bias(samples[:,1]).squeeze()
    return torch.sigmoid(dot + user_b + book_b) * 5.5

In [None]:
device = torch.device('cuda:0')

def run_test(model, ldr, crit):
    total_loss, total_count = 0, 0
    model.eval()
    tq_iters = tqdm(ldr, leave=False, desc='test iter')
    with torch.no_grad():
        for coords, labels in tq_iters:
            coords, labels = coords.to(device), labels.to(device)
            preds = model(coords)
            loss = crit(preds, labels)
            total_loss += loss.item() * labels.size(0)
            total_count += labels.size(0)
            tq_iters.set_postfix({'loss': total_loss/total_count}, refresh=True)
    return total_loss / total_count

def run_train(model, ldr, crit, opt, sched):
    model.train()
    total_loss, total_count = 0, 0
    tq_iters = tqdm(ldr, leave=False, desc='train iter')
    for (coords, labels) in tq_iters:
        opt.zero_grad()
        coords, labels = coords.to(device), labels.to(device)
        preds = model(coords)
        loss = crit(preds, labels)
        loss.backward()
        opt.step()
        sched.step()
        total_loss += loss.item() * labels.size(0)
        total_count += labels.size(0)
        tq_iters.set_postfix({'loss': total_loss/total_count}, refresh=True)
    return total_loss / total_count

def run_all(model, ldr_train, ldr_test, crit, opt, sched, n_epochs=10):
    best_loss = np.inf
    tq_epochs = tqdm(range(n_epochs), desc='epochs', unit='ep')
    for epoch in tq_epochs:
        train_loss = run_train(model, ldr_train, crit, opt, sched)
        test_loss = run_test(model, ldr_test, crit)
        tqdm.write(f'epoch {epoch}   train loss {train_loss:.6f}    test loss {test_loss:.6f}')
        if test_loss < best_loss:
            best_loss = test_loss
            tq_epochs.set_postfix({'bE': epoch, 'bL': best_loss}, refresh=True)

In [None]:
model = BookRecommenderEmbeddingML(ds_full.n_users, ds_full.n_books, 24) 
model.to(device)

ldr_train = torch.utils.data.DataLoader(ds_train, batch_size=32, shuffle=True)
ldr_test = torch.utils.data.DataLoader(ds_test, batch_size=32)

n_epochs = 2

crit = nn.MSELoss().to(device)
opt = optim.SGD(model.parameters(), lr=1e-6, momentum=0.9)
sched = optim.lr_scheduler.OneCycleLR(opt, max_lr=0.1, steps_per_epoch=len(ldr_train), epochs=n_epochs)

run_all(model, ldr_train, ldr_test, crit, opt, sched, n_epochs)

HBox(children=(FloatProgress(value=0.0, description='epochs', max=2.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.869606    test loss 0.750823


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.721712    test loss 0.707309



In [None]:
def parameter_test(emb, batch, n_epochs, learn, p, max_learn, input_model, new_model=True):
  if new_model:
    model = BookRecommenderEmbeddingML(ds_full.n_users, ds_full.n_books, emb)
  else:
    model = input_model
  model.to(device)
  ldr_train = torch.utils.data.DataLoader(ds_train, batch_size=batch, shuffle=True)
  ldr_test = torch.utils.data.DataLoader(ds_test, batch_size=batch)

  crit = nn.MSELoss().to(device)
  opt = optim.SGD(model.parameters(), lr=learn,momentum=p)
  sched = optim.lr_scheduler.OneCycleLR(opt, max_lr=max_learn, steps_per_epoch=len(ldr_train), epochs= n_epochs)

  run_all(model, ldr_train, ldr_test, crit, opt, sched, n_epochs)
  
  return model


In [None]:
#Note this code was ran before adding the ability to reuse a model in a a parameter test
print("Testing embedding dimensions")
print("Embedding dimensions: 24")
parameter_test(24,32,10,1e-6,0.9,0.1)
print("Embedding dimensions: 36")
parameter_test(36,32,10,1e-6,0.9,0.1)
print("Embedding dimnesions: 16")
parameter_test(16,32,10,1e-6,0.9,0.1)

Testing embedding dimensions
Embedding dimensions: 24


HBox(children=(FloatProgress(value=0.0, description='epochs', max=10.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.929395    test loss 0.755043


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.744440    test loss 0.739025


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 2   train loss 0.710669    test loss 0.711210


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 3   train loss 0.637788    test loss 0.708742


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 4   train loss 0.572261    test loss 0.724261


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 5   train loss 0.522515    test loss 0.740535


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 6   train loss 0.479530    test loss 0.756431


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 7   train loss 0.437108    test loss 0.767776


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 8   train loss 0.394280    test loss 0.778003


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 9   train loss 0.359575    test loss 0.782329

Embedding dimensions: 36


HBox(children=(FloatProgress(value=0.0, description='epochs', max=10.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.929409    test loss 0.755452


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.743797    test loss 0.737059


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 2   train loss 0.704285    test loss 0.706660


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 3   train loss 0.613999    test loss 0.711102


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 4   train loss 0.528247    test loss 0.735725


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 5   train loss 0.461362    test loss 0.766615


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 6   train loss 0.405524    test loss 0.791463


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 7   train loss 0.354254    test loss 0.812934


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 8   train loss 0.307098    test loss 0.829122


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 9   train loss 0.272463    test loss 0.834333

Embedding dimnesions: 16


HBox(children=(FloatProgress(value=0.0, description='epochs', max=10.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.929107    test loss 0.755954


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.745480    test loss 0.741416


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 2   train loss 0.716000    test loss 0.711314


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 3   train loss 0.656688    test loss 0.705428


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 4   train loss 0.606910    test loss 0.711756


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 5   train loss 0.569972    test loss 0.719912


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 6   train loss 0.537119    test loss 0.724191


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 7   train loss 0.502902    test loss 0.728200


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 8   train loss 0.465979    test loss 0.733057


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 9   train loss 0.434134    test loss 0.736313



In [None]:
print("Testing dropout")
parameter_test(16,32,10,1e-6,0.9,0.1)

Testing dropout


HBox(children=(FloatProgress(value=0.0, description='epochs', max=10.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.929140    test loss 0.754891


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.745515    test loss 0.741954


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 2   train loss 0.729752    test loss 0.724180


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 3   train loss 0.707502    test loss 0.714512


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 4   train loss 0.689451    test loss 0.709717


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 5   train loss 0.673317    test loss 0.702310


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 6   train loss 0.653071    test loss 0.694003


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 7   train loss 0.627640    test loss 0.681285


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 8   train loss 0.598085    test loss 0.670720


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 9   train loss 0.572408    test loss 0.668701



In [None]:
print("Testing dropout over a longer period of time:")
long_time_dropout = parameter_test(16,32,25,1e-6,0.9,0.1,0,new_model=True)

Testing dropout over a longer period of time:


HBox(children=(FloatProgress(value=0.0, description='epochs', max=25.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.954837    test loss 0.779787


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.747807    test loss 0.740060


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 2   train loss 0.731545    test loss 0.729834


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 3   train loss 0.716316    test loss 0.716036


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 4   train loss 0.699686    test loss 0.711177


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 5   train loss 0.689067    test loss 0.710229


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 6   train loss 0.683797    test loss 0.713536


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 7   train loss 0.679737    test loss 0.715411


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 8   train loss 0.676273    test loss 0.714928


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 9   train loss 0.672093    test loss 0.715293


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 10   train loss 0.669590    test loss 0.717656


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 11   train loss 0.665155    test loss 0.714400


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 12   train loss 0.660629    test loss 0.713335


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 13   train loss 0.655338    test loss 0.711366


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 14   train loss 0.649018    test loss 0.707692


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 15   train loss 0.641175    test loss 0.703063


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 16   train loss 0.632969    test loss 0.698854


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 17   train loss 0.622715    test loss 0.693789


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 18   train loss 0.611383    test loss 0.687685


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 19   train loss 0.598948    test loss 0.683494


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 20   train loss 0.585806    test loss 0.677626


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 21   train loss 0.572749    test loss 0.673526


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 22   train loss 0.560809    test loss 0.670502


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 23   train loss 0.550914    test loss 0.669363


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 24   train loss 0.545088    test loss 0.669209



In [None]:
#Further model testing
model = parameter_test(16,32,50,1e-6,0.9,0.1,0,new_model=True)

HBox(children=(FloatProgress(value=0.0, description='epochs', max=50.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 0   train loss 0.962090    test loss 0.793487


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 1   train loss 0.756548    test loss 0.744162


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 2   train loss 0.730381    test loss 0.733641


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 3   train loss 0.720828    test loss 0.723001


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 4   train loss 0.705625    test loss 0.710865


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 5   train loss 0.689052    test loss 0.702745


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 6   train loss 0.676445    test loss 0.700954


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 7   train loss 0.668801    test loss 0.700732


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 8   train loss 0.665676    test loss 0.704260


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 9   train loss 0.664814    test loss 0.706971


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 10   train loss 0.665843    test loss 0.709990


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

HBox(children=(FloatProgress(value=0.0, description='test iter', max=37353.0, style=ProgressStyle(description_…

epoch 11   train loss 0.666612    test loss 0.715710


HBox(children=(FloatProgress(value=0.0, description='train iter', max=149412.0, style=ProgressStyle(descriptio…

##Citations
Below are citations for the use of the datasets used in the code above  
(Citations are in the style specified by the dataset compiler)  

@article{goodbooks2017,
    author = {Zajac, Zygmunt},
    title = {Goodbooks-10k: a new dataset for book recommendations},
    year = {2017},
    publisher = {FastML},
    journal = {FastML},
    howpublished = {\url{http://fastml.com/goodbooks-10k}},
}

In [None]:
matrix = np.empty((ds_full.n_users, ds_full.n_books), int)
for i in range(ds_full.n_users):
  for j in range(ds_full.n_books):
    sample = torch.tensor([[i,j]])
    sample = sample.to(device)
    matrix[i][j] = model(sample)
    

KeyboardInterrupt: ignored

In [None]:
print(matrix[0][0])

NameError: ignored

In [None]:
matrix = np.empty((ds_full.n_users, ds_full.n_books), int)
for i in range(ds_full.n_users):
  currUserRatings = []
  for j in range(ds_full.n_books):
    sample = torch.tensor([[i,j]])
    sample = sample.to(device)
    currUserRatings.append(model(sample))
  matrix[i] = currUserRatings.sort()

NameError: ignored

In [None]:
ratings = [[book_id1, rating1], [book_id1, rating1], [book_id1, rating1]]

bestDiff = np.abs(matrix[0][ratings[0][0]] - ratings[0][1])
bestUser = 0
for i in range(ds_full.n_users):
  diff = 0
  for rating in ratings:
    diff = diff + np.abs(matrix[i][rating[0]] - rating[1])
  if diff < bestDiff:
    bestDiff = diff
    bestUser = i

In [None]:

NumBooks = 5 # Constant to determine how many books we will reccommend each time
bookRatings = matrix[bestUser]
def reccommendBooks(self, NumBooks):
  toReccommend = []
  toReccommend.append(bookRatings[0:NumBooks])
  bookRatings = bookRatings[NumBooks:]
  return toReccommend


  
  

In [None]:

# userNumber = bestUser
userNumber = 0 # 0 right now for testing
t = torch.LongTensor([0])
t = t.to(device)
result = model.user_embedding(t) # get feature vector for user 0
user_feature_vector = result[0]
print(user_feature_vector)



tensor([ 0.0405, -0.0013,  0.0147,  0.0941, -0.0121,  0.0659, -0.0007, -0.0590,
         0.0231, -0.0037, -0.0156,  0.0352,  0.0112, -0.0304, -0.0671,  0.0765,
         0.0341, -0.0895, -0.0696,  0.0303, -0.0232,  0.0053, -0.0722,  0.0204],
       device='cuda:0', grad_fn=<SelectBackward>)


In [None]:
model.book_embedding

Embedding(10000, 24)

In [None]:
bestBook = 0
b = torch.LongTensor([0])
b = b.to(device)
bestBookResult = torch.sigmoid((user_feature_vector * model.book_embedding(b)).sum(1))[0]
for i in range(ds_full.n_books):
  b = torch.LongTensor([i])
  b = b.to(device)
  bookEmbedding = model.book_embedding(b)
  dotP = (user_feature_vector * bookEmbedding).sum(1)
  result = torch.sigmoid(dotP)[0]
  if (result > bestBookResult):
      bestBookResult = result
      bestBook = i
  


In [None]:
bestBookResult

tensor(0.5479, device='cuda:0', grad_fn=<SelectBackward>)

In [None]:
bestBook

69

In [None]:
def lossFunction(self, rating):
  d = torch.LongTensor([bestBook])
  d = d.to(device)
  dot = (user_feature_vector * model.book_embedding(d)).sum(1)[0]
  loss = abs(dot - rating)
  return loss
  


In [None]:
lossFunction(model, 4)

tensor(3.8078, device='cuda:0', grad_fn=<AbsBackward>)