# 2.1 构建训练集，测试集与验证集

In [1]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

In [2]:
data_x = []
data_y = []

lines = open("uzi_data.txt").readlines()
for line in lines:
    vector, label = line.strip().split("\t")
    label = int(label)
    if label == -1:
        label = 0
    vector = vector.split(",")
    vector = [int(v) for v in vector]
    data_x.append(vector)
    data_y.append(label)

In [3]:
frac = int(len(data_x)/5)

In [4]:
train_x = np.array(data_x[:frac*3])
train_y = np.array(data_y[:frac*3])
valid_x = np.array(data_x[frac*3:frac*4])
valid_y = np.array(data_y[frac*3:frac*4])
test_x = np.array(data_x[frac*4:])
test_y = np.array(data_y[frac*4:])

print("\t\t\tFeature Shapes:")
print("Train set: \t\t{}".format(train_x.shape), 
      "\nTrain_Y set: \t{}".format(train_y.shape),
      "\nTest set: \t\t{}".format(test_x.shape))

			Feature Shapes:
Train set: 		(3684, 20) 
Train_Y set: 	(3684,) 
Test set: 		(1231, 20)


In [5]:
train_data = TensorDataset(torch.from_numpy(train_x), torch.from_numpy(train_y))
valid_data = TensorDataset(torch.from_numpy(valid_x), torch.from_numpy(valid_y))
test_data = TensorDataset(torch.from_numpy(test_x), torch.from_numpy(test_y))

batch_size = 64

train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size,drop_last=True)
valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size,drop_last=True)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size,drop_last=True)

# 2.2 构建模型

In [6]:
import torch.nn as nn

In [7]:
class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, n_layers, drop_prob, bidirectional=False, attention=False):
        super(SentimentLSTM, self).__init__()
        self.GPU = False
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        self.bidirectional = bidirectional
        self.attention = attention
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob, bidirectional=bidirectional)
        self.attention_layers = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )
        if bidirectional and not attention:
            h_dim = hidden_dim * 2
        else:
            h_dim = hidden_dim
        self.fc = nn.Sequential(
            nn.Linear(h_dim, 1),
            nn.Dropout(drop_prob),
            nn.Sigmoid()
        )
    
    def attention_forward(self, x, hx):
        if self.bidirectional:
            x_tmp = torch.chunk(x, 2, -1)
            h = x_tmp[0] + x_tmp[1]
        else:
            h = x
        hx = torch.sum(hx, dim=1)
        hx = hx.unsqueeze(1)
        attent_w = self.attention_layers(hx)
        m = torch.tanh(h)
        attent_context = torch.bmm(attent_w, m.transpose(1, 2))
        softmax_w = torch.softmax(attent_context, dim=-1)
        context = torch.bmm(softmax_w, h)
        context = context.squeeze(1)
        return context
        
    def forward(self, x, hx):
        batch_size = x.size(0)
        x = x.long()
        x = self.embedding(x)
        x, hx = self.lstm(x, hx)
        if self.attention == False:
            if self.bidirectional:
                x = x.contiguous().view(-1, self.hidden_dim*2)
            else:
                x = x.contiguous().view(-1, self.hidden_dim)
            y = self.fc(x)
            y = y.view(batch_size, -1)
            y = y[:,-1]
        else:
           # x = x.permute(1, 0, 2)
            hx = hx[0].permute(1, 0, 2)
            attent_out = self.attention_forward(x, hx)
            y = self.fc(attent_out)
        return y

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        number = 2 if self.bidirectional else 1
        if self.GPU:
            hx = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hx = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_(),
                  weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_())
        return hx

# 2.3 训练

In [8]:
import torch.nn.functional as F
import torch.optim as optim
import random

In [37]:
grid = {
    "embedding_dim": [64, 128, 256],
    "hidden_dim": [100, 200, 300],
    "n_layers": [1],
    "drop_prob": [0.1, 0.2, 0.3],
    "lr":[0.001, 0.002, 0.0005]
}

def param_search():
    params = {}
    for param in grid.keys():
        params[param] = random.choice(grid[param])
    return params

In [38]:
params = param_search()
epochs = 10
lr = params["lr"]
print(params)

{'embedding_dim': 128, 'hidden_dim': 100, 'n_layers': 1, 'drop_prob': 0.1, 'lr': 0.001}


In [54]:
model = SentimentLSTM(16296, params["embedding_dim"], params["hidden_dim"], params["n_layers"], params["drop_prob"],bidirectional=True,attention=True)
print(model)
optimizer = optim.Adam(model.parameters(), lr=lr)

SentimentLSTM(
  (embedding): Embedding(16296, 128)
  (lstm): LSTM(128, 100, batch_first=True, dropout=0.1, bidirectional=True)
  (attention_layers): Sequential(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): ReLU()
  )
  (fc): Sequential(
    (0): Linear(in_features=100, out_features=1, bias=True)
    (1): Dropout(p=0.1, inplace=False)
    (2): Sigmoid()
  )
)


In [55]:
import time

if torch.cuda.is_available():
    model.cuda()
    model.GPU = True

model.train()

t1 = time.time()
for epoch in range(epochs):
    it = 0
    hx = model.init_hidden(batch_size)
    for x, y in train_loader:
        if torch.cuda.is_available():
            x = x.cuda()
            y = y.cuda()
        hx = tuple([h.data for h in hx])
        y_ = model(x, hx)
        loss = F.binary_cross_entropy(y_.squeeze(), y.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if it % 10 == 0:
            print("Epoch: {}/{}...".format(epoch, epochs),
                  "Step: {}...".format(it),
                  "Loss: {:.6f}...".format(loss.item()))
        it += 1
t2 = time.time()

Epoch: 0/10... Step: 0... Loss: 0.685204...
Epoch: 0/10... Step: 10... Loss: 0.584851...
Epoch: 0/10... Step: 20... Loss: 0.627124...
Epoch: 0/10... Step: 30... Loss: 0.541212...
Epoch: 0/10... Step: 40... Loss: 0.556536...
Epoch: 0/10... Step: 50... Loss: 0.513268...
Epoch: 1/10... Step: 0... Loss: 0.576665...
Epoch: 1/10... Step: 10... Loss: 0.469355...
Epoch: 1/10... Step: 20... Loss: 0.585975...
Epoch: 1/10... Step: 30... Loss: 0.443022...
Epoch: 1/10... Step: 40... Loss: 0.499566...
Epoch: 1/10... Step: 50... Loss: 0.557027...
Epoch: 2/10... Step: 0... Loss: 0.439101...
Epoch: 2/10... Step: 10... Loss: 0.423701...
Epoch: 2/10... Step: 20... Loss: 0.414940...
Epoch: 2/10... Step: 30... Loss: 0.494703...
Epoch: 2/10... Step: 40... Loss: 0.373337...
Epoch: 2/10... Step: 50... Loss: 0.353056...
Epoch: 3/10... Step: 0... Loss: 0.447752...
Epoch: 3/10... Step: 10... Loss: 0.415809...
Epoch: 3/10... Step: 20... Loss: 0.419187...
Epoch: 3/10... Step: 30... Loss: 0.258544...
Epoch: 3/10...

In [57]:
num_correct = 0
test_loss = []

hx = model.init_hidden(batch_size)
 
model.eval()
# iterate over test data
for x, y in test_loader:
    hx = tuple([h.data for h in hx])
    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()
    
    # get predicted outputs
    y_ = model(x, hx)
    
    loss = F.binary_cross_entropy(y_.squeeze(), y.float())
    test_loss.append(loss.item())
    
    pred = torch.round(y_.squeeze())
    correct_tensor = pred.eq(y.float().view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not torch.cuda.is_available() else np.squeeze(correct_tensor.cpu().numpy())
    num_correct += np.sum(correct)
                             
print("Test loss: {:.3f}".format(np.mean(test_loss)))
 
test_acc = num_correct/len(test_loader.dataset)
print("Accuracy: {:.3f}".format(test_acc))
print("Time consuming: {}s".format(t2-t1))

Test loss: 0.775
Accuracy: 0.764
Time consuming: 34.440152406692505s
