## Introduction to Torch

In [2]:
import torch

In [3]:
import torch.autograd as autograd

In [4]:
import torch.nn as nn

In [5]:
import torch.nn.functional as F

In [6]:
import torch.optim as optim

In [7]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f84b0514960>

In [8]:
V_data = [1., 2., 3.]
V = torch.Tensor(V_data)
print(V)


 1
 2
 3
[torch.FloatTensor of size 3]



In [9]:
M_data = [[1., 2., 3.], [4., 5., 6.]]
M = torch.Tensor(M_data)
print(M)


 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]



In [10]:
T_data = [[[1., 2.], [3., 4.]], [[5., 6.], [7., 8.]]]
T = torch.Tensor(T_data)
print(T)


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]



In [11]:
print(V[0])

1.0


In [12]:
print(M[0])


 1
 2
 3
[torch.FloatTensor of size 3]



In [13]:
print(T[0])


 1  2
 3  4
[torch.FloatTensor of size 2x2]



In [14]:
x = torch.randn((3, 4, 5))
print(x)


(0 ,.,.) = 
 -2.9718  1.7070 -0.4305 -2.2820  0.5237
  0.0004 -1.2039  3.5283  0.4434  0.5848
  0.8407  0.5510  0.3863  0.9124 -0.8410
  1.2282 -1.8661  1.4146 -1.8781 -0.4674

(1 ,.,.) = 
 -0.7576  0.4215 -0.4827 -1.1198  0.3056
  1.0386  0.5206 -0.5006  1.2182  0.2117
 -1.0613 -1.9441 -0.9596  0.5489 -0.9901
 -0.3826  1.5037  1.8267  0.5561  1.6445

(2 ,.,.) = 
  0.4973 -1.5067  1.7661 -0.3569 -0.1713
  0.4068 -0.4284 -1.1299  1.4274 -1.4027
  1.4825 -1.1559  1.6190  0.9581  0.7747
  0.1940  0.1687  0.3061  1.0743 -1.0327
[torch.FloatTensor of size 3x4x5]



In [15]:
x = torch.Tensor([1., 2., 3.])
y = torch.Tensor([4., 5., 6.])
z = x + y
print(z)


 5
 7
 9
[torch.FloatTensor of size 3]



In [16]:
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 = torch.cat([x_1, y_1])
print(z_1)


 1.0930  0.7769 -1.3128  0.7099  0.9944
-0.2694 -0.6491 -0.1373 -0.2954 -0.7725
-0.2215  0.5074 -0.6794 -1.6115  0.5230
-0.8890  0.2620  0.0302  0.0013 -1.3987
 1.4666 -0.1028 -0.0097 -0.8420 -0.2067
[torch.FloatTensor of size 5x5]



In [17]:
x = torch.randn(2, 3, 4)
print(x)


(0 ,.,.) = 
  1.0672  0.1732 -0.6873  0.3111
  0.2358 -1.0658  0.3620  0.3776
 -0.2443 -0.5850  2.0812 -0.1186

(1 ,.,.) = 
  0.4903  0.8349  0.8894  0.4148
  0.0507 -0.9644 -2.0111  0.5245
  2.1332 -0.0822  0.8388 -1.3233
[torch.FloatTensor of size 2x3x4]



In [18]:
print(x.view(2, 12))



Columns 0 to 9 
 1.0672  0.1732 -0.6873  0.3111  0.2358 -1.0658  0.3620  0.3776 -0.2443 -0.5850
 0.4903  0.8349  0.8894  0.4148  0.0507 -0.9644 -2.0111  0.5245  2.1332 -0.0822

Columns 10 to 11 
 2.0812 -0.1186
 0.8388 -1.3233
[torch.FloatTensor of size 2x12]



In [19]:
print(x.view(2, -1))



Columns 0 to 9 
 1.0672  0.1732 -0.6873  0.3111  0.2358 -1.0658  0.3620  0.3776 -0.2443 -0.5850
 0.4903  0.8349  0.8894  0.4148  0.0507 -0.9644 -2.0111  0.5245  2.1332 -0.0822

Columns 10 to 11 
 2.0812 -0.1186
 0.8388 -1.3233
[torch.FloatTensor of size 2x12]



In [20]:
x = autograd.Variable(torch.Tensor([1., 2., 3.]), requires_grad=True)
print(x.data)


 1
 2
 3
[torch.FloatTensor of size 3]



In [21]:
y = autograd.Variable(torch.Tensor([4., 5., 6]), requires_grad=True)
print(y)

Variable containing:
 4
 5
 6
[torch.FloatTensor of size 3]



In [22]:
z = x + y

In [23]:
print(z.data)


 5
 7
 9
[torch.FloatTensor of size 3]



In [24]:
print(z.grad_fn)

<torch.autograd.function.AddBackward object at 0x7f84b0063b88>


In [25]:
s = z.sum()

In [26]:
print(s)

Variable containing:
 21
[torch.FloatTensor of size 1]



In [27]:
print(s.grad_fn)

<torch.autograd.function.SumBackward object at 0x7f84b0063e58>


In [28]:
s.backward()

In [29]:
print(x.grad)

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



In [30]:
x = torch.randn((2, 2))

In [31]:
y = torch.randn((2, 2))

In [32]:
z = x + y

In [33]:
print(z)


-0.5494  2.7333
 2.4204 -1.8912
[torch.FloatTensor of size 2x2]



In [34]:
var_x = autograd.Variable(x)
var_y = autograd.Variable(y)

In [35]:
var_z = var_x + var_y

In [36]:
print(var_z.grad_fn)

<torch.autograd.function.AddBackward object at 0x7f847e255048>


In [37]:
var_z_data = var_z.data

In [38]:
print(var_z_data)


-0.5494  2.7333
 2.4204 -1.8912
[torch.FloatTensor of size 2x2]



In [39]:
new_var_z = autograd.Variable(var_z_data)

In [40]:
print(new_var_z)

Variable containing:
-0.5494  2.7333
 2.4204 -1.8912
[torch.FloatTensor of size 2x2]



In [41]:
print(new_var_z.data)


-0.5494  2.7333
 2.4204 -1.8912
[torch.FloatTensor of size 2x2]



In [42]:
print(new_var_z.grad_fn)

None


## Deep learning building blocks: Affine maps, non-linearities and objectives

In [43]:
import torch

In [44]:
import torch.autograd as autograd

In [45]:
import torch.nn as nn

In [46]:
import torch.nn.functional as F

In [47]:
import torch.optim as optim

In [48]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f84b0514960>

In [49]:
lin = nn.Linear(5, 3)

In [50]:
print(lin)

Linear (5 -> 3)


In [51]:
lin = nn.Linear(6, 4)

In [52]:
data = autograd.Variable(torch.randn(2, 6))

In [53]:
print(lin(data))

Variable containing:
 0.3700  0.5486 -0.5048 -1.1223
 0.6555 -0.0479  0.3285 -0.5158
[torch.FloatTensor of size 2x4]



In [54]:
data = autograd.Variable(torch.randn(2, 2))
print(data)
print(F.relu(data))

Variable containing:
 1.0743 -1.0327
 1.0930  0.7769
[torch.FloatTensor of size 2x2]

Variable containing:
 1.0743  0.0000
 1.0930  0.7769
[torch.FloatTensor of size 2x2]



In [55]:
data = autograd.Variable(torch.randn(5))

In [56]:
print(data)

Variable containing:
-1.3128
 0.7099
 0.9944
-0.2694
-0.6491
[torch.FloatTensor of size 5]



In [57]:
print(F.softmax(data))

Variable containing:
 0.0428
 0.3232
 0.4296
 0.1214
 0.0830
[torch.FloatTensor of size 5]



In [58]:
print(F.softmax(data).sum())

Variable containing:
 1
[torch.FloatTensor of size 1]



In [59]:
print(F.log_softmax(data))

Variable containing:
-3.1521
-1.1294
-0.8449
-2.1087
-2.4884
[torch.FloatTensor of size 5]



## Object Functions

In [97]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
       ("Give it to me".split(), "ENGLISH"),
       ("No creo que sea una buena idea".split(), "SPANISH"),
       ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

In [98]:
test_data = [("Yo creo que si".split(), "SPANISH"),
            ("it is lost on me".split(), "ENGLISH")]

In [99]:
print(test_data)

[(['Yo', 'creo', 'que', 'si'], 'SPANISH'), (['it', 'is', 'lost', 'on', 'me'], 'ENGLISH')]


In [100]:
word_to_ix = {}

In [101]:
for sent, _ in data + test_data:
    for word in sent:
        print(word)
        print(sent)
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
            print(word_to_ix[word])

me
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
0
gusta
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
1
comer
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
2
en
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
3
la
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
4
cafeteria
['me', 'gusta', 'comer', 'en', 'la', 'cafeteria']
5
Give
['Give', 'it', 'to', 'me']
6
it
['Give', 'it', 'to', 'me']
7
to
['Give', 'it', 'to', 'me']
8
me
['Give', 'it', 'to', 'me']
No
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
9
creo
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
10
que
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
11
sea
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
12
una
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
13
buena
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
14
idea
['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea']
15
No
['No', 'it', 'is', 'not', 'a', 'good', 'idea', 'to', 'get', 'lost', 'at', 'sea']
it
['No', 'it', 'is', '

In [102]:
VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

In [103]:
class BoWClassifier(nn.Module):
    
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        
        self.linear = nn.Linear(vocab_size, num_labels)
    
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec))

In [104]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

In [105]:
def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

In [106]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

In [107]:
print(word_to_ix)

{'que': 11, 'Give': 6, 'lost': 21, 'la': 4, 'una': 13, 'at': 22, 'get': 20, 'a': 18, 'is': 16, 'good': 19, 'to': 8, 'gusta': 1, 'Yo': 23, 'sea': 12, 'creo': 10, 'cafeteria': 5, 'No': 9, 'idea': 15, 'buena': 14, 'not': 17, 'on': 25, 'it': 7, 'si': 24, 'comer': 2, 'me': 0, 'en': 3}


In [108]:
for param in model.parameters():
    print(param)

Parameter containing:

Columns 0 to 9 
-0.0337  0.1765  0.0763 -0.0027 -0.0337  0.0159 -0.1765  0.1041  0.0141 -0.1783
-0.0401  0.0151 -0.1313  0.0597  0.1677 -0.0544 -0.0597  0.0279  0.0984  0.0541

Columns 10 to 19 
 0.0642 -0.1412  0.0058  0.1147  0.1744 -0.1844  0.0339  0.1503  0.1582  0.0160
 0.0886 -0.1466  0.1503  0.0746  0.0485  0.0580  0.0984 -0.0573 -0.0593  0.1032

Columns 20 to 25 
-0.1422 -0.0204 -0.1415  0.1538  0.1206 -0.0480
-0.0902 -0.0563  0.1553  0.0992 -0.0282  0.1496
[torch.FloatTensor of size 2x26]

Parameter containing:
 0.1823
-0.1915
[torch.FloatTensor of size 2]



In [110]:
sample = data[1]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)

Variable containing:
-0.5736 -0.8290
[torch.FloatTensor of size 1x2]



In [112]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(bow_vec)
    print(log_probs)

Variable containing:

Columns 0 to 12 
    0     0     0     0     0     0     0     0     0     0     1     1     0

Columns 13 to 25 
    0     0     0     0     0     0     0     0     0     0     1     1     0
[torch.FloatTensor of size 1x26]

Variable containing:
-0.4525 -1.0107
[torch.FloatTensor of size 1x2]

Variable containing:

Columns 0 to 12 
    1     0     0     0     0     0     0     1     0     0     0     0     0

Columns 13 to 25 
    0     0     0     1     0     0     0     0     1     0     0     0     1
[torch.FloatTensor of size 1x26]

Variable containing:
-0.5846 -0.8150
[torch.FloatTensor of size 1x2]



In [113]:
print(next(model.parameters())[:, word_to_ix["creo"]])


Variable containing:
1.00000e-02 *
  6.4246
  8.8644
[torch.FloatTensor of size 2]



In [114]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [116]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [117]:
for epoch in range(100):
    for instance, label in data:
        model.zero_grad()
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
        target = autograd.Variable(make_target(label, label_to_ix))
        log_probs = model(bow_vec)
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

In [118]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

Variable containing:
-0.0859 -2.4970
[torch.FloatTensor of size 1x2]

Variable containing:
-2.5910 -0.0779
[torch.FloatTensor of size 1x2]



In [119]:
print(next(model.parameters())[:, word_to_ix["creo"]])

Variable containing:
 0.5013
-0.3484
[torch.FloatTensor of size 2]

