<a href="https://colab.research.google.com/github/jerryshenfewcher/Cits4012/blob/main/ABSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import json
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split


2. 数据预处理

In [None]:
class AspectDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index]

def load_data(filepath):
    with open(filepath, 'r') as file:
        data = json.load(file)
    return data

def preprocess_data(data):
    texts = [d['text'] for d in data]
    aspects = [d['aspect'] for d in data]
    sentiments = [d['sentiment'] for d in data]
    return texts, aspects, sentiments


3. 建立模型

In [None]:
class GRUModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(GRUModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths):
        embedded = self.embedding(text)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
        packed_output, hidden = self.gru(packed_embedded)
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
        hidden = hidden.squeeze(0)
        out = self.fc(hidden)
        return out


4. 训练模型

In [None]:
def train_model(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for texts, aspects, sentiments in train_loader:
        optimizer.zero_grad()
        outputs = model(texts, aspects)  # 假设已有方法处理输入
        loss = criterion(outputs, sentiments)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)


5. 主函数

In [None]:
def main():
    # 加载和预处理数据
    data = load_data('train.json')
    texts, aspects, sentiments = preprocess_data(data)

    # 假设已有一些方法来转换文本到索引等
    # 创建数据集和数据加载器
    dataset = AspectDataset(list(zip(texts, aspects, sentiments)))
    train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

    # 实例化模型、优化器和损失函数
    vocab_size = 10000  # 假设词汇表大小
    embedding_dim = 100
    hidden_dim = 256
    output_dim = 3  # 三种情感极性
    model = GRUModel(vocab_size, embedding_dim, hidden_dim, output_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # 训练模型
    loss = train_model(model, train_loader, optimizer, criterion)
    print(f'Training loss: {loss}')

if __name__ == '__main__':
    main()
