<a href="https://colab.research.google.com/github/ikanez/nlp-harvard/blob/master/2_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sizes and Embeddings

In [0]:
!pip install -q torch torchtext opt_einsum
!pip install -qU git+https://github.com/harvardnlp/namedtensor

In [0]:
import torch
from namedtensor import ntorch

In [0]:
torch.ones(3, 5).shape

torch.Size([3, 5])

In [0]:
q = ntorch.ones(3, 5, names=("batch", "vocab"))

In [0]:
VOCAB = 100

In [0]:
weight = ntorch.randn(VOCAB, requires_grad=True, names=("vocab",))

In [0]:
phi = ntorch.randn(VOCAB, names=("vocab",))

In [0]:
weight.dot("vocab", phi)

NamedTensor(
	tensor(12.2432),
	())

In [0]:
"the movie was okay, but it was not very good"

In [0]:
sentences = [4,5,2,7,3,4,5,0,0,0,0]

In [0]:
padded = ntorch.tensor(torch.eye(VOCAB)[sentences], names=("time", "vocab"))

OrderedDict([('time', 11), ('vocab', 100)])

In [0]:
padded.shape

OrderedDict([('time', 5), ('vocab', 100)])

In [0]:
bagofwords = padded.sum("time")

In [0]:
setofwords, _ = padded.max("time")

In [0]:
y = weight.dot("vocab", bagofwords).sigmoid()

In [0]:
y.backward()

In [0]:
weight.values.grad

tensor([0.0000, 0.1805, 0.1805, 0.0000, 0.1805, 0.1805, 0.1805, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])

In [0]:
## Softmax Regression

In [0]:
CLASSES = 5

In [0]:
weight = ntorch.randn(CLASSES, VOCAB, requires_grad=True, names=("classes", "vocab"))

In [0]:
weight.shape

OrderedDict([('classes', 5), ('vocab', 100)])

In [0]:
y = weight.dot("vocab", bagofwords)

In [0]:
y.shape

OrderedDict([('classes', 5)])

In [0]:
y.softmax("classes").get("classes", 3).backward()

In [0]:
y.get("classes", 3).backward()

In [0]:
weight.values.grad

tensor([[ 0.0000e+00, -1.0718e-03, -1.0718e-03,  0.0000e+00, -1.0718e-03,
         -1.0718e-03, -1.0718e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.

In [0]:
_, argmax = y.softmax("classes").max("classes")

In [0]:
argmax

NamedTensor(
	tensor(2),
	())

In [0]:
argmax

## Batch Dimensions

In [0]:
torch.eye(VOCAB)[sentences]

In [0]:
padded = ntorch.tensor(torch.stack([torch.eye(VOCAB)[sentences]]*5), names=("batch", "time", "vocab"))

In [0]:
padded.shape

OrderedDict([('batch', 5), ('time', 11), ('vocab', 100)])

In [0]:
bagofwords = padded.sum("time")

In [0]:
y = weight.dot("vocab", bagofwords)

In [0]:
_, predictions = y.softmax("classes").max("classes")

In [0]:
predictions

NamedTensor(
	tensor([2, 2, 2, 2, 2]),
	('batch',))

In [0]:
weight = ntorch.randn(11, VOCAB, names=("time", "vocab"))

In [0]:
weight = weight.stack(("time", "vocab"), "features")

In [0]:
padded2 = padded.stack(("time","vocab"), "features")

In [0]:
weight.dot(("time", "vocab"), padded)

NamedTensor(
	tensor([1.0806, 1.0806, 1.0806, 1.0806, 1.0806]),
	('batch',))

## Embeddings

In [0]:
embeddings = ntorch.randn(VOCAB, 50, names=("vocab", "embedding"))

In [0]:
padded.shape

OrderedDict([('batch', 5), ('time', 11), ('vocab', 100)])

In [0]:
phi = embeddings.dot("vocab", padded)

In [0]:
phi.sum("time").shape

OrderedDict([('embedding', 50), ('batch', 5)])

In [0]:
sparse_padded = ntorch.tensor(torch.tensor([sentences] * 5).long(), names=("batch", "time"))

In [0]:
phi = embeddings.index_select("vocab", sparse_padded)

In [0]:
phi.shape

OrderedDict([('batch', 5), ('time', 11), ('embedding', 50)])

In [0]:
q.shape

OrderedDict([('batch', 3), ('vocab', 5)])

In [0]:
padded = ntorch.tensor(torch.eye(100)[[2,4,1,5,6]], names=("time", "vocab"))

In [0]:
q.shape

OrderedDict([('time', 5), ('vocab', 100)])

In [0]:
ntorch.randn(10, 10, names=("a", "b"))

NamedTensor(
	tensor([[-0.2757,  1.3737,  0.3013,  0.7264, -0.5615, -1.1377, -1.2657,  1.9019,
          0.2590, -1.3312],
        [ 0.1194,  2.0329, -1.2947,  0.7613, -2.0706, -1.3822, -1.5996, -0.8982,
         -1.0142, -0.8676],
        [ 1.0065, -1.0533, -0.9762, -0.8570,  0.8130,  0.1056, -1.2641,  0.5431,
          1.9614,  0.4483],
        [-0.0704, -0.3861,  0.8171,  2.7926,  0.3012, -0.5579,  0.9826, -0.8872,
         -2.2831,  0.3868],
        [-0.2457, -0.0396,  0.0856, -1.2311, -0.7474, -0.5262,  0.2337,  0.1905,
          0.0074,  2.8564],
        [ 0.1273, -0.5845,  0.0770,  0.7371,  0.7796,  0.6446, -0.9665,  1.0825,
         -2.3709, -0.0736],
        [-0.0943, -1.0616,  0.0597,  0.7328,  0.2400, -0.2676, -0.0542,  0.9283,
         -2.8106, -1.0523],
        [-0.5619,  1.6360, -0.3489,  0.4330, -0.6399, -0.2958,  0.3440, -1.0261,
          0.7910,  1.4833],
        [-0.8453,  0.9169, -0.1346,  1.1407, -1.9582, -0.0145, -1.1900,  1.0621,
         -0.2043,  0.5463],
     