### 1. Tensor的使用

In [10]:
import torch

In [11]:
# x = torch.Tensor([[1, 2, 3], [4, 5, 6]])
x = torch.randn((2, 3, 4))
x

tensor([[[-0.1832, -0.8339, -0.4587,  0.8328],
         [-0.3546,  1.2222,  1.1350,  1.4671],
         [-1.9389, -1.1653, -0.1825, -1.2143]],

        [[-1.4831,  1.0244, -0.5144,  0.3595],
         [ 0.5914, -2.3901, -0.7608, -0.4785],
         [-0.3594,  1.0850, -0.7563, -2.0344]]])

In [12]:
x = torch.Tensor([[1, 2], [3, 4]])
y = torch.Tensor([[5, 6], [7, 8]])
z = x + y
z

tensor([[ 6.,  8.],
        [10., 12.]])

In [13]:
x = torch.randn((2, 4, 6))
# x = x.view(-1)
x = x.view(6, -1) # 等价于 x = x.view(6, 8)
x.size()

torch.Size([6, 8])

### 2. 计算图和自动微分

In [14]:
from torch.autograd import Variable
from torch import nn
from torch.nn.functional import relu, softmax

In [15]:
# 将Tensor变为Variable
x = Variable(torch.Tensor([1,2,3]), requires_grad=True)
# 将Variable变为Tensor
# x = x.data
x

tensor([1., 2., 3.], requires_grad=True)

In [16]:
x = Variable(torch.Tensor([1, 2]), requires_grad=True)
y = Variable(torch.Tensor([3, 4]), requires_grad=True)
z = x + y
print(z)
s = z.sum()
print(s)
s.backward()
print(x.grad)

tensor([4., 6.], grad_fn=<AddBackward0>)
tensor(10., grad_fn=<SumBackward0>)
tensor([1., 1.])


In [17]:
linear = nn.Linear(3, 5)
x = Variable(torch.ones((5, 3)))
print(x)
print("Size of x: ", x.size())
y = linear(x)
print(y)
print("Size of y: ", y.size())

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
Size of x:  torch.Size([5, 3])
tensor([[-0.2406,  0.6009, -0.3527,  0.5031, -0.1083],
        [-0.2406,  0.6009, -0.3527,  0.5031, -0.1083],
        [-0.2406,  0.6009, -0.3527,  0.5031, -0.1083],
        [-0.2406,  0.6009, -0.3527,  0.5031, -0.1083],
        [-0.2406,  0.6009, -0.3527,  0.5031, -0.1083]],
       grad_fn=<AddmmBackward>)
Size of y:  torch.Size([5, 5])


In [18]:
x = Variable(torch.randn(10))
print(x)
x_relu = relu(x)
print(x_relu)
x_softmax = softmax(x, dim=0) # dim选择对哪一维度进行softmax
print(x_softmax)
print(x_softmax.sum())

tensor([ 1.1059,  0.1676,  0.4489,  0.9154, -0.2483,  0.9295,  0.6973,  0.3895,
        -0.3461, -3.2694])
tensor([1.1059, 0.1676, 0.4489, 0.9154, 0.0000, 0.9295, 0.6973, 0.3895, 0.0000,
        0.0000])
tensor([0.1911, 0.0748, 0.0991, 0.1580, 0.0493, 0.1602, 0.1270, 0.0934, 0.0447,
        0.0024])
tensor(1.0000)


### 3. 创建网络

In [19]:
from torch import nn
from torch.nn.functional import log_softmax
from torch import optim

In [20]:
word2id = {"hello": 0, "world": 1}
vocab_size = len(word2id)
embed_dim = 5
embedding = nn.Embedding(vocab_size, embed_dim) #即两个单词，单词的词嵌入维度为5
hello_id = torch.LongTensor([word2id["hello"]])
hello_id = Variable(hello_id)
print(hello_id)
hello_embed = embedding(hello_id)
print(hello_embed)

tensor([0])
tensor([[-0.1923,  0.5000,  0.1805, -0.9468,  0.0080]],
       grad_fn=<EmbeddingBackward>)


In [21]:
# N-Gram Model
text_list = """When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a totter'd weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say, within thine own deep sunken eyes,
Were an all-eating shame, and thriftless praise.
How much more praise deserv'd thy beauty's use,
If thou couldst answer 'This fair child of mine
Shall sum my count, and make my old excuse,'
Proving his beauty by succession thine!
This were to be new made when thou art old,
And see thy blood warm when thou feel'st it cold.""".split()
trigrams = [([text_list[i], text_list[i+1]], text_list[i+2]) for i in range(len(text_list) - 2)]
trigrams[:10]

[(['When', 'forty'], 'winters'),
 (['forty', 'winters'], 'shall'),
 (['winters', 'shall'], 'besiege'),
 (['shall', 'besiege'], 'thy'),
 (['besiege', 'thy'], 'brow,'),
 (['thy', 'brow,'], 'And'),
 (['brow,', 'And'], 'dig'),
 (['And', 'dig'], 'deep'),
 (['dig', 'deep'], 'trenches'),
 (['deep', 'trenches'], 'in')]

In [22]:
# build vocab
vocab = set(text_list)
word2id = {word: i for i, word in enumerate(vocab)}
id2word = {id : word for word, id in word2id.items()}
vocab_size = len(vocab)
print(len(vocab))
print(len(word2id))
print(len(id2word))

97
97
97


In [23]:
#N-Gram Language model
class NGramLanguageModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, context_size):
        super(NGramLanguageModel, self).__init__()
        self.vocab_size = vocab_size
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.linear1 = nn.Linear(embed_dim * context_size, 128)
        self.linear2 = nn.Linear(128, vocab_size)
        
    def forward(self, input):
        '''
        :input: (context_size, 1)
        '''
        embed = self.embedding(input).view(1, -1) # 变成 (1, context_size * embed_dim)
        #print(embed)
        out = relu(self.linear1(embed)) # (1, 128)
        #print(out)
        out = relu(self.linear2(out)) # (1, vocab_size)
        #print(out.sum())
        log_probs = log_softmax(out, dim=-1) # dim=-1取最后一位
        #print(log_probs.sum())
        return log_probs

In [48]:
context_size = 2
embed_dim = 10
criterion = nn.NLLLoss()
model = NGramLanguageModel(vocab_size, embed_dim, context_size)
optimizer = optim.SGD(model.parameters(), lr=0.1)
print(model)

NGramLanguageModel(
  (embedding): Embedding(97, 10)
  (linear1): Linear(in_features=20, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=97, bias=True)
)


In [49]:
# 开始训练
epochs = 20
losses = []
for epoch in range(epochs):
    total_loss = torch.Tensor([0])
    #print("-----------------------------------")
    for context, target in trigrams:
        
        #model.train()
        context_ids = list(map(lambda word : word2id[word], context)) # map(函数, 作用对象)
        context_var = Variable(torch.LongTensor(context_ids))
        #print(context_var)
        # 关于zero_grad(): https://blog.csdn.net/qq_34690929/article/details/79934843
        optimizer.zero_grad()
        
        log_probs = model(context_var)
        #print(log_probs)
        loss = criterion(log_probs.contiguous().view(-1, vocab_size), 
                         Variable(torch.LongTensor([word2id[target]])).contiguous().view(-1))
        #print(loss)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data
    losses.append(total_loss)
print(losses)

[tensor([521.8771]), tensor([479.2557]), tensor([443.8206]), tensor([412.8786]), tensor([388.5361]), tensor([373.6031]), tensor([366.6183]), tensor([363.1750]), tensor([361.2988]), tensor([360.1934]), tensor([359.4611]), tensor([358.9449]), tensor([358.5704]), tensor([358.2753]), tensor([358.0569]), tensor([357.8811]), tensor([357.7350]), tensor([357.6149]), tensor([357.5160]), tensor([357.4354])]


In [50]:
# 测试
for context, target in trigrams:
        model.eval()
        context_ids = list(map(lambda word : word2id[word], context)) # map(函数, 作用对象)
        context_var = Variable(torch.LongTensor(context_ids))
        model.zero_grad()
        log_probs = model(context_var)
        idx = int(torch.argmax(log_probs))
        print("%s : %s" %(target, id2word[idx]))

winters : winters
shall : shall
besiege : besiege
thy : thy
brow, : see
And : now,
dig : dig
deep : beauty's
trenches : thy
in : see
thy : thy
beauty's : beauty's
field, : thou
Thy : Thy
youth's : so
proud : Where
livery : so
so : so
gazed : my
on : on
now, : now,
Will : see
be : be
a : see
totter'd : see
weed : see
of : of
small : my
worth : see
held: : held:
Then : see
being : being
asked, : deserv'd
where : see
all : deserv'd
thy : thy
beauty : beauty
lies, : see
Where : Where
all : shall
the : see
treasure : see
of : of
thy : thy
lusty : beauty
days; : see
To : To
say, : thy
within : see
thine : deserv'd
own : see
deep : see
sunken : were
eyes, : were
Were : see
an : winters
all-eating : thou
shame, : thou
and : see
thriftless : thou
praise. : my
How : How
much : see
more : see
praise : my
deserv'd : deserv'd
thy : thy
beauty's : blood
use, : thou
If : see
thou : thou
couldst : see
answer : were
'This : see
fair : dig
child : see
of : of
mine : see
Shall : see
sum : see
my : see
co