# LSTM网络情感二分类

In [1]:
import torch
from torch import nn
import tools
import pandas
import data_process


## 一、定义网络

自己写的embedding+lstm的网络，效果较差

In [2]:
class lstm(nn.Module):
    def __init__(self,num_embeddings,embedding_dim,hidden_size,device,num_layers=1,*args, **kwargs):
        super().__init__(*args, **kwargs)
        self.embedding = nn.Embedding(num_embeddings,embedding_dim)
        self.lstm = nn.LSTM(embedding_dim,hidden_size,num_layers)
        self.ff = nn.Linear(hidden_size,2)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device
    def forward(self,X):
        h0 = torch.randn(self.num_layers,X.shape[0],self.hidden_size).to(self.device)
        c0 = torch.randn(self.num_layers,X.shape[0],self.hidden_size).to(self.device)
        X = self.embedding(X)
        X = X.transpose(0,1)
        output,_ = self.lstm(X,(h0,c0))
        return self.ff(output[-1])
        
        

改进的lstm，加了四个地方:
- lstm改成双向
- lstm加了dropout层
- 隐藏状态初始化为0
- 增加最大池化

In [3]:
class LSTM(nn.Module):
    def __init__(self, num_embeddings, embedding_dim, hidden_size, device, num_layers=1, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, bidirectional=True, dropout=0.5, batch_first=True)#增加了droput层和双向
        self.ff = nn.Linear(2 * hidden_size, 2)  # 双向LSTM的输出需要乘以2
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device

    def forward(self, X):
        h0 = torch.zeros(self.num_layers * 2, X.shape[0], self.hidden_size).to(self.device)  # 隐藏状态初始化为0
        c0 = torch.zeros(self.num_layers * 2, X.shape[0], self.hidden_size).to(self.device)
        
        X = self.embedding(X)
        output, (hn, cn) = self.lstm(X, (h0, c0))

        # 使用最后一个时间步的输出进行分类
        output_pooled = torch.max(output, dim=1)[0]  # 这里使用最大池化
        return self.ff(output_pooled)


## 二、包装数据

In [4]:
data  = pandas.read_csv('./motionClassify.csv')
vocab = data_process.gen_vocab(data)
data_train  =  data_process.gen_dataset(data[:40000],vocab)
data_test = data_process.gen_dataset(data[40000:],vocab)
Batch_size = 64
train_iter = torch.utils.data.DataLoader(data_train,Batch_size,shuffle=True)
test_iter = torch.utils.data.DataLoader(data_test,Batch_size,shuffle=True)

## 三、训练参数设置

In [5]:


lr = 0.1
criterion = torch.nn.CrossEntropyLoss()
device = torch.device('cpu' if not torch.cuda.is_available() else 'cuda:0')
net1 = lstm(num_embeddings=len(vocab),embedding_dim=256,hidden_size=256,device=device)
optimizer1 = torch.optim.SGD(net1.parameters(),lr)
net2 = LSTM(num_embeddings=len(vocab),embedding_dim=256,hidden_size=256,device=device)
optimizer2 = torch.optim.SGD(net2.parameters(),lr)




## 四、训练和测试

第一个网络训练和测试的结果

In [6]:
tools.train(net1,train_iter,device,optimizer1,criterion)

 18%|█▊        | 113/625 [00:01<00:07, 65.01it/s]

batch100,loss = 0.7077628970146179


 33%|███▎      | 206/625 [00:03<00:08, 51.20it/s]

batch200,loss = 0.6943620443344116


 49%|████▉     | 307/625 [00:05<00:06, 49.91it/s]

batch300,loss = 0.6926913857460022


 65%|██████▌   | 409/625 [00:07<00:04, 49.46it/s]

batch400,loss = 0.6888372302055359


 81%|████████▏ | 508/625 [00:09<00:02, 49.70it/s]

batch500,loss = 0.692266047000885


 97%|█████████▋| 606/625 [00:11<00:00, 49.77it/s]

batch600,loss = 0.6926555633544922


100%|██████████| 625/625 [00:11<00:00, 52.40it/s]


In [7]:
tools.test(net1,test_iter,device)

100%|██████████| 157/157 [00:00<00:00, 157.20it/s]

accuracy = 0.49969998002052307





准确率50%，接近自然概率，训练没有效果

第二个网络训练以及测试的结果

In [8]:
tools.train(net2,train_iter,device,optimizer2,criterion)

  0%|          | 0/625 [00:00<?, ?it/s]

 16%|█▋        | 103/625 [00:02<00:14, 35.49it/s]

batch100,loss = 0.6333499550819397


 32%|███▏      | 203/625 [00:05<00:11, 35.17it/s]

batch200,loss = 0.8822867274284363


 48%|████▊     | 303/625 [00:08<00:10, 31.99it/s]

batch300,loss = 0.6779134273529053


 64%|██████▍   | 403/625 [00:11<00:06, 31.96it/s]

batch400,loss = 0.47503724694252014


 80%|████████  | 503/625 [00:14<00:03, 35.33it/s]

batch500,loss = 0.38483309745788574


 96%|█████████▋| 603/625 [00:18<00:00, 31.97it/s]

batch600,loss = 0.5027120113372803


100%|██████████| 625/625 [00:18<00:00, 33.43it/s]


In [9]:
tools.test(net2,test_iter,device)

100%|██████████| 157/157 [00:01<00:00, 138.63it/s]

accuracy = 0.8072999715805054





改进：可以增加训练的轮数(epoch)
可以做的任务：
- 可以把第一个网络一步步修改成第二个，看看到底是哪一个改进起了作用，实验发现仅仅改成双向网络效果还是差
- 可以在test函数里加入更多的benchmark，例如召回率（recall），F1-score，等等，见[春招算法题](./2024春招算法题.pdf)
- 对于第二个网络，可以通过增加训练轮数,改变学习率(lr)、词嵌入维度(embedding_size)、lstm的隐藏层神经元个数(hidden_size),优化器(optimizer)的种类等等参数