In [1]:
'''
  code by Tae Hwan Jung(Jeff Jung) @graykode, modify by wmathor
'''
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import torch.nn.functional as F

In [2]:
# 3 words sentences (=sequence_length is 3)
sentences = ["i love you", "he loves me", "she likes baseball", "i hate you", "sorry for that", "this is awful"]
labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.

# TextCNN Parameter
embedding_size = 2
sequence_length = len(sentences[0]) # every sentences contains sequence_length(=3) words
num_classes = len(set(labels))  # num_classes=2
batch_size = 3

word_list = " ".join(sentences).split()
vocab = list(set(word_list))
word2idx = {w: i for i, w in enumerate(vocab)}
vocab_size = len(vocab)

In [3]:
def make_data(sentences, labels):
    inputs = []
    for sen in sentences:
        inputs.append([word2idx[n] for n in sen.split()])

    targets = []
    for out in labels:
        targets.append(out) # To using Torch Softmax Loss function
    return inputs, targets

input_batch, target_batch = make_data(sentences, labels)
input_batch, target_batch = torch.LongTensor(input_batch), torch.LongTensor(target_batch)

dataset = Data.TensorDataset(input_batch, target_batch)
loader = Data.DataLoader(dataset, batch_size, True)

In [4]:
voab_size = len(vocab)
embedding_size=100
hidden_size=128
num_layers=2
batch_first=True
biddirectional=False
# device = torch.device("mps")

In [14]:
class LSTMClass(nn.Module):
    def __init__(self,voab_size,embedding_size,hidden_size,num_layers,batch_first,biddirectional):
        # voab_size2,embedding_size300
        super(LSTMClass,self).__init__()
        self.embedding = nn.Embedding(voab_size,embedding_size)
        # LSTM参数：input_size输入维度（和embedding_size一样，只是叫法不同）、hidden_size、numbe_layers,batch_first、biddirectional
        self.lstm = nn.LSTM(input_size=embedding_size,hidden_size=hidden_size,
                            num_layers=num_layers,batch_first=True,bidirectional=True)
        self.fc = nn.Linear(hidden_size*2,2) # max_len:每个评论的长度
    def forward(self,X):
        embedding_x = self.embedding(X) # 输出形状（batch_size，seq_len,embedding_size）
        print('embedding_x',embedding_x.shape)
        x,(h_n,c_n) = self.lstm(embedding_x)
        output_fw = h_n[-2,:,:]
        output_bw = h_n[-1,:,:]
        output = torch.cat([output_fw,output_bw],dim=-1)
        print('output',output.shape)
        return self.fc(output)
        
        

In [28]:
class BiRNN(nn.Module):
    def __init__(self, voab_size,embed_size,num_hiddens,num_layers,batch_first,biddirectional):
        super(BiRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        # 将bidirectional设置为True以获取双向循环神经网络
        self.encoder = nn.LSTM(embed_size, num_hiddens, num_layers=num_layers,
                                bidirectional=True)
        self.decoder = nn.Linear(4 * num_hiddens, 2)

    def forward(self, inputs):
        # inputs的形状是（批量大小，时间步数）
        # 因为长短期记忆网络要求其输入的第一个维度是时间维，
        # 所以在获得词元表示之前，输入会被转置。
        # 输出形状为（时间步数，批量大小，词向量维度）
        embeddings = self.embedding(inputs.T)
        self.encoder.flatten_parameters()
        # 返回上一个隐藏层在不同时间步的隐状态，
        # outputs的形状是（时间步数，批量大小，2*隐藏单元数）
        outputs, _ = self.encoder(embeddings)
        # 连结初始和最终时间步的隐状态，作为全连接层的输入，
        # 其形状为（批量大小，4*隐藏单元数）
        encoding = torch.cat((outputs[0], outputs[-1]), dim=1)
        print('encoding', encoding.shape)
        outs = self.decoder(encoding)
        return outs

In [29]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


device(type='cpu')

In [30]:
model = BiRNN(voab_size,embedding_size,hidden_size,num_layers,batch_first,biddirectional).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.1)

# Training
for epoch in range(20):
    for batch_x, batch_y in loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
#         print('batch_x',batch_x.shape)
        pred = model(batch_x)
        loss = criterion(pred, batch_y)
        #     if (epoch + 1) % 1000 == 0:
#         print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(loss))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding torch.Size([3, 512])
encoding t