In [127]:
train2

<torchtext.data.iterator.BucketIterator at 0x1637ea850>

In [14]:
# Adapted from https://github.com/pytorch/text/blob/master/torchtext/datasets/imdb.py
import os, glob, gzip, random
import torch
from torch.autograd import Variable
from torchtext import data, datasets, vocab
from scipy.stats import norm
random.seed(1234)
from collections import Counter
import math
import numpy as np
import copy

In [15]:
# Load jester dataset, tested on Python 2.7
#### Arguments
# -load_text: Load text or not. In problem 2, text is unnecessary hence this flag should be
#             set to False to save memory. Default True.
# -batch_size: batch size. Default 1.
# -subsample_rate: Change this to 0.2 in problem 3 and use default 1.0 in problem 2. Default: 1.0
# -repeat: Whether to repeat the iterator for multiple epochs. If set to False, 
#          then .init_epoch() needs to be called before starting next epoch. Default False.
# -shuffle: Whether to shuffle examples between epochs.
# -ratings_path: The path to user, joke, rating file.
# -jokes_path: The path to jokes file 
# -max_vocab_size: Only the most max_vocab_size frequent words would be kept. We use 
#                  this to reduce memory footprint and the number of model parameters. Default: 150.
# -gpu: Use GPU or not. Default False.
#
#### Returns
# -train_iter: An iterator for training examples. You can call "for batch in train_iter"
#  to get the training batches. Note that if repeat=False, then
#  train_iter.init_epoch() needs to be called before starting next epoch
# -val_iter: An iterator for validation examples.
# -test_iter: An iterator for test examples.
# -text_field (when load_text is True): A field object, text_field.vocab is the vocabulary. 
#
#### Note:
# batch.ratings are ratings, can be 1, 2, 3, 4 or 5.
# batch.users are user ids, ranging from 1 to 150.
# batch.jokes are joke ids, ranging from 1 to 63978.
#
#### Example 1:
# train_iter, val_iter, test_iter, text_field = load_jester(batch_size=100, subsample_rate=1.0, load_text=True)
# V = len(text_field.vocab) # vocab size
# for epoch in range(num_epochs):
#     train_iter.init_epoch()
#     for batch in train_iter:
#         text = batch.text[0] # x is a tensor of size batch_size x max_len, where max_len
#                           # is the maximum joke length in the batch. The other jokes with
#                           # length < max_len are padded with text_field.vocab.stoi['<pad>']
#         ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
#                                # and we want that to be 0/1/2/3/4.
#         users = batch.users-1 
#         jokes = batch.jokes-1 
#### Example 2 (word id to word str, word str to word id):
# word_id = 5
# word_str = text_field.vocab.itos[word_id]
# word_id = text_field.vocab.stoi[word_id]

In [16]:
# Ignore this, irrelevant to homework
class Example(data.Example):
    @classmethod
    def fromlist(cls, data, fields):
        ex = cls()
        for (name, field), val in zip(fields, data):
            if field is not None:
                setattr(ex, name, field.preprocess(val))
        return ex

In [17]:
def load_jester(load_text=True, batch_size=1, subsample_rate=1.0, repeat=False, shuffle=True,
        ratings_path='jester_ratings.dat.gz', jokes_path='jester_items.clean.dat.gz', max_vocab_size=150, gpu=False):
    DEV = 0 if gpu else -1
    assert os.path.exists(jokes_path), "jokes file %s does not exist!"%jokes_path
    assert os.path.exists(ratings_path), "ratings file %s does not exist!"%ratings_path
    text_field = data.Field(lower=True, include_lengths=True, batch_first=True)
    rating_field = data.Field(sequential=False, use_vocab=False)
    user_field = data.Field(sequential=False, use_vocab=False)
    joke_field = data.Field(sequential=False, use_vocab=False)
    if load_text:
        fields = [('text', text_field), ('ratings', rating_field), ('users', user_field), ('jokes', joke_field)]
    else:
        fields = [('ratings', rating_field), ('users', user_field), ('jokes', joke_field)]
    jokes_text = {}
    joke = -1
    all_tokens = []
    with gzip.open(jokes_path) as f:
        for i, line in enumerate(f):
            l = line.decode('utf-8')
            if len(l.strip()) == 0:
                continue
            if l.strip()[-1] == ':':
                joke = int(l.strip().strip(':'))
            else:
                joke_text = l.strip()
                tokens = l.strip().split()
                all_tokens.extend(tokens)
                jokes_text[joke] = joke_text
    counts = Counter(all_tokens)
    most_common = counts.most_common(max_vocab_size)
    most_common = set([item[0] for item in most_common])


    print ('Loading Data, this might take several minutes')
    if subsample_rate < 1.0:
        print ('Subsampling rate set to %f'%subsample_rate)

    train, val, test = [], [], []
    with gzip.open(ratings_path) as f:
        for i, l in enumerate(f):
            if i % 100000 == 0:
                print ('%d lines read'%i)
            user, joke, rating = l.split()
            user = int(user)
            joke = int(joke)
            rating = int(rating)
            if load_text:
                assert joke in jokes_text
                example = Example.fromlist([' '.join([item for item in jokes_text[joke].split() if item in most_common]), rating, user, joke], fields)
            else:
                example = Example.fromlist([rating, user, joke], fields)
            p = random.random()
            q = random.random()
            if p < 0.98:
                if q < subsample_rate:
                    train.append(example)
            elif p < 0.99:
                val.append(example)
            elif p < 1.0:
                test.append(example)
        train = data.Dataset(train, fields)
        val = data.Dataset(val, fields)
        test = data.Dataset(test, fields)
        train_iter, val_iter, test_iter = data.BucketIterator.splits(
            (train, val, test), 
            batch_size=batch_size, device=DEV, repeat=repeat,
            shuffle=shuffle)
        train_iter.sort_key = lambda p: len(p.text) if hasattr(p, 'text') else 0
        val_iter.sort_key = lambda p: len(p.text) if hasattr(p, 'text') else 0
        test_iter.sort_key = lambda p: len(p.text) if hasattr(p, 'text') else 0

    print ('Data Loaded')

    if load_text:
        text_field.build_vocab(train)
        return train_iter, val_iter, test_iter, text_field
    else:
        return train_iter, val_iter, test_iter,

In [18]:
# PyTorch function for calcuating log \phi(x)
# example usage: normlogcdf1 = NormLogCDF()((h-b_r)/sigma)
class NormLogCDF(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.
    """

    def forward(self, input):
        """
        In the forward pass we receive a Tensor containing the input and return a
        Tensor containing the output. You can cache arbitrary Tensors for use in the
        backward pass using the save_for_backward method.
        """
        input_numpy = input.numpy()
        output = torch.Tensor(norm.logcdf(input_numpy))
        self.save_for_backward(input)
        return output

    def backward(self, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        input, = self.saved_tensors
        input_numpy = input.numpy()
        grad_input = grad_output.clone()
        grad_input = grad_input * torch.Tensor(np.exp(norm.logpdf(input_numpy) - norm.logcdf(input_numpy)))
        # clip infinities to 1000
        grad_input[grad_input==float('inf')] = 1000
        # clip -infinities to -1000
        grad_input[grad_input==float('-inf')] = -1000
        # set nans to 0
        grad_input[grad_input!=grad_input] = 0
        return grad_input

# (c)

In [19]:
K = 2
u = torch.nn.Embedding(70000,K)
v = torch.nn.Embedding(150,K)

In [20]:
# ind = Variable(torch.LongTensor([ [0] ]))
def loglikf(rij,uvdot,sigsq):
    return -math.log(1/math.sqrt(2*math.pi*sigsq))-((rij - uvdot)**2)/(2*sigsq)

In [21]:
print loglikf(1,2,1.0)

0.418938533205


# (d)

In [22]:
# PyTorch function for calculating log (\phi(x) - \phi(y)) where \phi is the normal distribution cdf
#### Arguments
# -x: a PyTorch Variable of size (batch_size).
# -y: a PyTorch Variable of size (batch_size). x[i] should be always greater than y[i].
#### Returns
# log (phi (x) - \phi(y))
def log_difference(x, y):
    # calculate by using p1 and p2
    logp1 = NormLogCDF()(x)
    logp2 = NormLogCDF()(y)
    logp = logp1 + torch.log(1 - torch.exp(logp2-logp1))
    # calculate by using 1-p1 and 1-p2
    log1_p1 = NormLogCDF()(-x)
    log1_p2 = NormLogCDF()(-y)
    logp_ = log1_p2 + torch.log(1 - torch.exp(log1_p1-log1_p2))
    return torch.max(logp, logp_)

In [23]:
def lin_reg_torch(nfeats,nclasses):
    model = torch.nn.Sequential()
    model.add_module('linear',torch.nn.Linear(nfeats,nclasses))
    return model

def train(model,loss,optimizer,x,y):
    x = Variable(x)
    y = Variable(y)

    optimizer.zero_grad()

    forward_x = model.forward(x)
    obj = loss.forward(forward_x,y)

    obj.backward()

    optimizer.step()

    return obj.data[0]

def predict(model, x):
    # x = Variable(x)
    forward_x = model.forward(x)
    return forward_x.data.numpy().argmax(axis=1)

In [24]:
BS = 1000

In [25]:
train2,val2,test2 = load_jester(load_text=False,batch_size=BS)

Loading Data, this might take several minutes
0 lines read
100000 lines read
200000 lines read
300000 lines read
400000 lines read
500000 lines read
600000 lines read
700000 lines read
800000 lines read
900000 lines read
1000000 lines read
1100000 lines read
1200000 lines read
1300000 lines read
1400000 lines read
1500000 lines read
1600000 lines read
1700000 lines read
Data Loaded


In [31]:
class Model(torch.nn.Module):
    def __init__(self,K):
        super(Model,self).__init__()
        self.usersfeats = torch.nn.Embedding(70000,K)
        self.jokesfeats = torch.nn.Embedding(151,K)
    
    def forward(self,inds_user,inds_jokes):
        innerprod = torch.sum(self.usersfeats(inds_user)*self.jokesfeats(inds_jokes),1)
        return innerprod

In [122]:
# K = 2
sigsq = 1.0
eta = 0.1
num_epochs = 3

valrmses = []
testrmses = []
trainrmses = []

for K in range(1,11):
    val_RMSE = 0
    val_cntr = 0
    test_RMSE = 0
    test_cntr = 0
    train_RMSE = 0
    train_cntr = 0
    
    usersfeats = torch.nn.Embedding(70000,K)
    jokesfeats = torch.nn.Embedding(150,K) #     model = Model(K)
#     jinter = jokesfeats.weight.data.numpy() #     jj = model.jokesfeats
#     jj = copy.deepcopy(jinter)
    optimizer = torch.optim.SGD([jokesfeats.weight,usersfeats.weight],lr=eta) # model.parameters(),lr=eta)
    
    for epoch in range(num_epochs):
        train2.init_epoch()
        for batch in train2:
            ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                       # and we want that to be 0/1/2/3/4.
            users = batch.users-1 
            jokes = batch.jokes-1

            inds_user = Variable(torch.LongTensor(users.data.numpy()))
            inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))

#             innerprod = model.forward(inds_user,inds_jokes) # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
            
            optimizer.zero_grad()

            sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(jokesfeats(inds_jokes)*usersfeats(inds_user),1),2)) # model.forward(inds_user,inds_jokes),2)
            
            loss = (sqerror)/(2*sigsq) # *(1/len(innerprod.data.numpy()))
            
            loss.backward()
            
            optimizer.step()
            
        print "trained %s, epoch %s"%(K,epoch)

    for batch in val2:
        ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                   # and we want that to be 0/1/2/3/4.
        users = batch.users-1 
        jokes = batch.jokes-1

        inds_user = Variable(torch.LongTensor(users.data.numpy()))
        inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))

        sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1),2)) # model.forward(inds_user,inds_jokes),2)
            
        loss = (sqerror)/(2*sigsq) 
        # *(1/len(innerprod.data.numpy())) # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        
        predval = torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        val_RMSE += torch.sum((predval - ratings.type(torch.FloatTensor))**2)
        val_cntr += len(predval.data.numpy())
        
#         print val_cntr
        
    valrmses.append(((val_RMSE.data.numpy()[0])/val_cntr)**(0.5))
    print valrmses[-1],val_cntr
    
    for batch in test2:
        ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                   # and we want that to be 0/1/2/3/4.
        users = batch.users-1 
        jokes = batch.jokes-1

        inds_user = Variable(torch.LongTensor(users.data.numpy()))
        inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))

        sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1),2)) # model.forward(inds_user,inds_jokes),2)
            
        loss = (sqerror)/(2*sigsq) # *(1/len(innerprod.data.numpy()))
        # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        
        predtest = torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        test_RMSE += torch.sum((predtest - ratings.type(torch.FloatTensor))**2)
        test_cntr += len(predtest.data.numpy())
        
#         print val_cntr
        
    testrmses.append(((test_RMSE.data.numpy()[0])/test_cntr)**(0.5))
    print testrmses[-1],test_cntr
    
    for batch in train2:
        ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                   # and we want that to be 0/1/2/3/4.
        users = batch.users-1 
        jokes = batch.jokes-1

        inds_user = Variable(torch.LongTensor(users.data.numpy()))
        inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))

        sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1),2)) # model.forward(inds_user,inds_jokes),2)
            
        loss = (sqerror)/(2*sigsq) 
        # *(1/len(innerprod.data.numpy()))
        # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        
        predtrain = torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        train_RMSE += torch.sum((predtrain - ratings.type(torch.FloatTensor))**2)
        train_cntr += len(predtrain.data.numpy())
        
#         print train_cntr
        
    trainrmses.append(((train_RMSE.data.numpy()[0])/train_cntr)**(0.5))
    print trainrmses[-1],train_cntr

KeyboardInterrupt: 

In [None]:
print test_RMSE, val_RMSE, train_RMSE

In [102]:
jokesfeats = torch.nn.Embedding(150,K)
jokesfeats.weight

Parameter containing:
-0.4912
 0.0631
-0.8695
-0.0235
 0.2256
-0.5616
 0.8018
 0.2404
 1.3535
-1.0316
 0.3283
-0.0952
 1.8953
-0.2617
-0.3830
 0.6190
 0.0557
-1.0260
-1.2016
 0.3934
 0.0304
 0.5886
-0.7764
-0.6609
 0.7594
 0.4348
-0.2745
 1.6237
 0.2187
 0.5889
-0.3514
-0.7929
 0.3931
-0.1372
 0.2976
-0.3363
-0.3690
 0.6460
 0.3366
-0.6922
-0.4670
-1.5891
 0.9637
 2.0889
-0.4492
-0.5623
-0.8382
-0.4349
-2.2254
-0.5203
 1.7069
-0.2346
-0.2412
 1.4362
-1.8415
-1.1993
 0.1150
 0.7530
-1.7088
 0.5079
-0.3731
-0.8246
 0.8406
-1.1249
-0.7094
 2.0205
 0.4288
-0.2266
 0.4405
 0.1001
-1.6828
-1.3644
-0.0558
 0.9146
 0.4125
-1.6856
 1.5639
-0.8841
 0.5744
 1.9769
 1.2566
 1.5846
 0.3157
-0.3791
 0.8348
-1.0050
 0.3863
-1.4546
-0.4096
 0.1729
 1.5468
-1.0196
 0.4184
 0.7269
 1.0911
 0.3391
 0.4730
 0.5163
 1.2311
 1.1840
 0.6907
 2.4943
 0.0064
-0.5533
-0.8083
-0.6516
 0.9552
-0.5806
-0.8419
-0.7018
-0.3806
-0.2238
-0.2506
-1.8640
 0.8074
 0.0291
-0.4569
-0.2663
 1.0749
 0.1500
-0.0474
-1.7623
 0

In [118]:
usersfeats(inds_user)

Variable containing:
-0.9948  0.5036 -0.2942 -0.0985
-1.6511  1.5819  0.7101  1.4378
 1.7107  1.3247 -1.6435 -1.3916
 0.3838  1.1673 -0.6978 -1.9112
-0.6337  0.3300 -0.1536 -1.0553
 0.0956  1.2218 -1.4849 -1.3780
-1.2208  1.5846  0.8946 -0.8819
-2.5031 -0.7906 -0.2690  0.1350
 0.2481  1.3830  0.8811 -1.1945
-1.0297  1.1959  0.3188  0.2053
[torch.FloatTensor of size 10x4]

In [119]:
jokesfeats(inds_jokes)

Variable containing:
-1.4858  1.3756 -0.6989 -0.9970
-1.2640  1.4753 -0.3965 -0.7435
-1.3660  1.5983 -0.5289 -0.8458
-1.4170  1.5523 -0.4898 -0.9015
-1.3823  1.3664 -0.4989 -0.7994
-1.2505  1.1387 -0.5301 -0.8422
-0.9590  1.0148 -0.3333 -0.8742
-1.2559  1.1467 -0.4537 -0.7781
-1.3731  1.4593 -0.5648 -0.7981
-1.3723  1.3030 -0.4903 -0.9104
[torch.FloatTensor of size 10x4]

In [87]:
torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)

Variable containing:
 1.1496e-04
 1.7510e-02
-4.6969e-03
 3.8812e-03
 4.0440e-03
 9.9134e-03
 2.5107e-02
 3.6534e-02
 2.3300e-03
-2.5831e-02
 9.5901e-02
-1.1241e-02
 5.7685e-02
 3.1562e-02
-5.5874e-03
 1.0018e-02
 8.2120e-04
 2.1525e-02
-2.5186e-02
-1.5395e-02
 1.7725e-02
-1.2393e-02
-6.4497e-03
-6.1645e-03
-7.1428e-03
 7.5400e-04
-1.5026e-03
 1.2218e-02
-1.7118e-03
 1.9977e-02
-5.8901e-04
 6.3182e-03
 9.3312e-03
-1.6528e-02
 3.0061e-03
 1.4435e-02
-3.5218e-03
-5.7108e-02
 4.7205e-02
-2.7426e-02
-8.7882e-04
-1.4990e-02
-2.8393e-02
-7.2167e-03
 1.5428e-02
 2.8546e-02
 6.5519e-04
 1.7365e-02
 2.6103e-02
 7.8218e-03
-9.7666e-02
-1.5205e-02
-4.8704e-03
 2.9948e-02
-3.4937e-03
-1.6972e-02
-4.2239e-03
 4.4436e-02
-8.4850e-03
-5.6003e-03
-8.0138e-03
-2.2477e-02
 3.3048e-02
-1.3476e-02
 7.9103e-03
-1.2879e-02
 8.9745e-03
-2.8447e-02
-5.5703e-03
-2.1857e-03
 6.0662e-04
-1.1088e-04
-2.5615e-02
-2.9565e-03
-9.7761e-04
-6.4519e-02
 3.0295e-02
 2.7504e-02
 3.6038e-03
-6.9601e-04
-2.4567e-03
-8.8986

In [70]:
usersfeats(Variable(torch.LongTensor([0,1])))

Variable containing:
-1.1101 -0.6508 -0.8809 -0.4251  0.1054  1.5055 -2.4429 -1.0579 -1.1910 -0.3968
-3.4318  0.6052  0.3852  1.4339  0.4811  1.0113 -1.6192 -0.7617 -0.5112  1.2556
[torch.FloatTensor of size 2x10]

In [71]:
jokesfeats(Variable(torch.LongTensor([0,1])))

Variable containing:
-1.3801 -0.3368  0.0419  0.3121  0.4789  0.3931 -0.9177  1.0933  2.1546  0.1187
-0.4991  0.9449 -0.2203  0.3468 -1.1888  0.4214 -1.2120 -0.7144 -0.1448  0.8207
[torch.FloatTensor of size 2x10]

In [74]:
torch.sum(usersfeats(Variable(torch.LongTensor([0,1])))*jokesfeats(Variable(torch.LongTensor([0,1]))),1)

Variable containing:
 0.6962
 6.1626
[torch.FloatTensor of size 2]

# (f)

In [27]:
# K = 2
sigsq = 1.0
eta = 0.1
num_epochs = 2

valrmsesf = []
testrmsesf = []
trainrmsesf = []

for K in range(2,3):
    val_RMSE = 0
    val_cntr = 0
    test_RMSE = 0
    test_cntr = 0
    train_RMSE = 0
    train_cntr = 0
    
    usersfeats = torch.nn.Embedding(70000,K)
    jokesfeats = torch.nn.Embedding(150,K) #     model = Model(K)
    a = torch.nn.Embedding(70000,1)
    b = torch.nn.Embedding(150,1)
    g = torch.nn.Embedding(1,1)
#     g = Variable(torch.from_numpy(np.zeros(1)), requires_grad=True) #Variable(torch.from_numpy(np.array(1.)), requires_grad=True) #Variable(torch.Tensor([2])) # torch.nn.Embedding(1,1)
#     g = g.type(torch.FloatTensor)
    jinter = jokesfeats.weight.data.numpy() #     jj = model.jokesfeats
    jj = copy.deepcopy(jinter)
    optimizer = torch.optim.SGD([usersfeats.weight,jokesfeats.weight,a.weight,b.weight,g.weight],lr=eta) # model.parameters(),lr=eta)
    
    for epoch in range(num_epochs):
        train2.init_epoch()
        for batch in train2:
            ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                       # and we want that to be 0/1/2/3/4.
            users = batch.users-1 
            jokes = batch.jokes-1

            inds_user = Variable(torch.LongTensor(users.data.numpy()))
            inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))
            jsxz = Variable(torch.LongTensor(jokes.data.numpy()))
            
            jsxz.data = torch.LongTensor([0]*len(inds_user))

#             innerprod = model.forward(inds_user,inds_jokes) # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
            
            optimizer.zero_grad()

            sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)+a(inds_user).squeeze()+b(inds_jokes).squeeze()+g(jsxz).squeeze(),2)) # model.forward(inds_user,inds_jokes),2)
            
            loss = (sqerror)/(2*sigsq) # *(1/len(innerprod.data.numpy()))
            
            loss.backward()
            
            optimizer.step()
            
        print "trained %s, epoch %s"%(K,epoch)

    for batch in val2:
        ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                   # and we want that to be 0/1/2/3/4.
        users = batch.users-1 
        jokes = batch.jokes-1

        inds_user = Variable(torch.LongTensor(users.data.numpy()))
        inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))
        jsxz = Variable(torch.LongTensor(jokes.data.numpy()))
            
        jsxz.data = torch.LongTensor([0]*len(inds_user))

#         sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes)+a(inds_user)+b(inds_jokes),2))+g.expand(len(inds_user))) # model.forward(inds_user,inds_jokes),2)
            
#         loss = (sqerror)/(2*sigsq) 
        # *(1/len(innerprod.data.numpy())) # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        
        predval = torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)+a(inds_user).squeeze()+b(inds_jokes).squeeze()+g(jsxz).squeeze()
        val_RMSE += torch.sum((predval - ratings.type(torch.FloatTensor))**2)
        val_cntr += len(predval.data.numpy())
        
#         print val_cntr
        
    valrmsesf.append(((val_RMSE.data.numpy()[0])/val_cntr)**(0.5))
    print valrmsesf[-1]
    
    for batch in test2:
        ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                   # and we want that to be 0/1/2/3/4.
        users = batch.users-1 
        jokes = batch.jokes-1

        inds_user = Variable(torch.LongTensor(users.data.numpy()))
        inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))
        jsxz = Variable(torch.LongTensor(jokes.data.numpy()))
            
        jsxz.data = torch.LongTensor([0]*len(inds_user))
        
#         sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes)+a(inds_user)+b(inds_jokes),1)+g.expand(len(inds_user)),2)) # model.forward(inds_user,inds_jokes),2)
            
#         loss = (sqerror)/(2*sigsq) # *(1/len(innerprod.data.numpy()))
        # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        
        predtest = torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)+a(inds_user).squeeze()+b(inds_jokes).squeeze()+g(jsxz).squeeze()
        test_RMSE += torch.sum((predtest - ratings.type(torch.FloatTensor))**2)
        test_cntr += len(predtest.data.numpy())
        
#         print val_cntr
        
    testrmsesf.append(((test_RMSE.data.numpy()[0])/test_cntr)**(0.5))
    print testrmsesf[-1]
    
    for batch in train2:
        ratings = batch.ratings-1 # batch.rating is a tensor containing actual ratings 1/2/3/4/5,
                                   # and we want that to be 0/1/2/3/4.
        users = batch.users-1 
        jokes = batch.jokes-1

        inds_user = Variable(torch.LongTensor(users.data.numpy()))
        inds_jokes = Variable(torch.LongTensor(jokes.data.numpy()))
        jsxz = Variable(torch.LongTensor(jokes.data.numpy()))
            
        jsxz.data = torch.LongTensor([0]*len(inds_user))

#         sqerror = torch.mean(torch.pow(ratings.type(torch.FloatTensor)-torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes)+a(inds_user)+b(inds_jokes),1)+g.expand(len(inds_user)),2)) # model.forward(inds_user,inds_jokes),2)
            
#         loss = (sqerror)/(2*sigsq) 
        # *(1/len(innerprod.data.numpy()))
        # torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)
        
        predtrain = torch.sum(usersfeats(inds_user)*jokesfeats(inds_jokes),1)+a(inds_user).squeeze()+b(inds_jokes).squeeze()+g(jsxz).squeeze()
        train_RMSE += torch.sum((predtrain - ratings.type(torch.FloatTensor))**2)
        train_cntr += len(predtrain.data.numpy())
        
#         print train_cntr
        
    trainrmsesf.append(((train_RMSE.data.numpy()[0])/train_cntr)**(0.5))
    print trainrmsesf[-1]

KeyboardInterrupt: 

In [None]:
g.weight

In [40]:
g.weight

Parameter containing:
 2.3577
[torch.FloatTensor of size 1x1]

In [41]:
a.weight

Parameter containing:
-3.8690e-02
-5.2234e-01
 1.1857e+00
     ⋮      
-4.9123e-01
 9.4260e-01
-1.0283e+00
[torch.FloatTensor of size 70000x1]

In [42]:
b.weight

Parameter containing:
 1.2927
 0.3630
-1.3614
-0.7207
 1.7289
-0.3283
-0.2725
-0.0153
-1.6508
 0.0394
-0.9581
 0.4125
-0.4558
-1.4019
 0.4120
-0.9003
-1.1523
-0.6141
-0.8906
 1.2454
-1.2888
-0.0946
 0.5821
-2.0305
 0.7308
-1.5659
 1.0679
-2.0993
-0.3205
 0.6242
-0.7717
 0.6388
 0.5793
 0.3892
-0.0944
 0.0933
-0.9935
 2.0858
-0.3333
 1.5665
-1.5787
-0.1048
-0.6989
 0.3065
-0.7339
-1.4539
-0.7523
 0.3730
 0.3767
-1.4900
-1.2660
-2.7776
 2.0002
 0.5168
 1.5059
-0.9852
-0.1556
 0.6789
 0.0851
-0.6038
-0.0509
 1.6412
-0.2190
-1.7538
 0.1660
-0.5062
-0.4556
 0.9620
 0.0897
-1.0589
-0.0060
 1.0297
 0.2579
-1.1050
-0.8882
 0.7862
 0.2309
-0.5020
 0.3128
 0.8548
-1.7883
-0.7496
-1.8927
 1.6628
 1.1660
 1.6673
 0.3202
 1.7129
-0.3275
-0.1701
-2.2393
-0.2929
-0.9970
 0.6260
-0.4084
-0.6218
-0.1352
-0.2476
-0.5536
 0.4067
-0.9964
 0.9301
-0.6633
-0.0039
 0.0266
 0.7100
 0.8097
-0.3461
-1.0909
-0.9569
 0.6368
-0.8489
 0.0096
 0.5415
 0.8711
 1.1677
-0.9336
 0.1388
-0.9748
 1.1559
 0.5268
 0.5369
-0

In [47]:
np.argmax(b.weight.data.numpy())

131

In [64]:
b.weight.data.numpy()[np.argmax(b.weight.data.numpy())]

array([ 2.89153934], dtype=float32)

In [49]:
np.argmin(b.weight.data.numpy())

51

In [65]:
b.weight.data.numpy()[np.argmin(b.weight.data.numpy())]

array([-2.77755117], dtype=float32)

In [None]:
type(text_field)