In [None]:
from transformers import BertTokenizer, BertModel
import torch
import torch.nn as nn
import torch.optim as optim

# Example sentence
sentence = "The camera quality of this phone is excellent, but the battery life is disappointing."

# Tokenize the sentence
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sentence)))

# Identify noun chunks (you need to replace this with your own noun chunk extraction logic)
noun_chunks = ["camera quality", "phone", "battery life"]

# Convert noun chunks to token positions
chunk_positions = []
for chunk in noun_chunks:
    chunk_tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(chunk)))
    chunk_positions.append([i for i, token in enumerate(tokens) if token in chunk_tokens])

# Convert tokens to IDs
input_ids = tokenizer.convert_tokens_to_ids(tokens)
chunk_positions_ids = [item for sublist in chunk_positions for item in sublist]

# Create a binary label tensor where 1 indicates an aspect and 0 otherwise
labels = [1 if i in chunk_positions_ids else 0 for i in range(len(tokens))]
labels = torch.tensor(labels)

# Convert input_ids to tensor
input_ids = torch.tensor(input_ids).unsqueeze(0)  # Add batch dimension

# Load pre-trained BERT model
model = BertModel.from_pretrained('bert-base-uncased')

# Extract contextualized embeddings
with torch.no_grad():
    outputs = model(input_ids)

# Obtain embeddings for each token
word_embeddings = outputs.last_hidden_state.squeeze(0)

# Define a simple classification model for aspect identification
class AspectClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(AspectClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

# Train the aspect identification model
input_size = word_embeddings.size(1)
hidden_size = 256
output_size = 1
model = AspectClassifier(input_size, hidden_size, output_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train for a few epochs (you need to replace this with your own training loop)
for epoch in range(5):
    optimizer.zero_grad()
    predictions = model(word_embeddings)
    loss = criterion(predictions.view(-1), labels.float())
    loss.backward()
    optimizer.step()
torch.save(model.state_dict(), 'weights.pth')

In [15]:
from transformers import BertTokenizer, BertModel
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example test sentence
test_sentence = "The display is amazing, but the software is a bit slow."

# Tokenize the test sentence
test_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
test_tokens = test_tokenizer.tokenize(test_tokenizer.decode(test_tokenizer.encode(test_sentence)))

# Identify test noun chunks (replace this with your own noun chunk extraction logic)
test_noun_chunks = ["display", "software"]

# Convert test noun chunks to token positions
test_chunk_positions = []
for chunk in test_noun_chunks:
    chunk_tokens = test_tokenizer.tokenize(test_tokenizer.decode(test_tokenizer.encode(chunk)))
    test_chunk_positions.append([i for i, token in enumerate(test_tokens) if token in chunk_tokens])

# Convert test tokens to IDs
test_input_ids = test_tokenizer.convert_tokens_to_ids(test_tokens)
test_chunk_positions_ids = [item for sublist in test_chunk_positions for item in sublist]

# Create a binary label tensor where 1 indicates an aspect and 0 otherwise
test_labels = [1 if i in test_chunk_positions_ids else 0 for i in range(len(test_tokens))]
test_labels = torch.tensor(test_labels)

# Convert test_input_ids to tensor
test_input_ids = torch.tensor(test_input_ids).unsqueeze(0)  # Add batch dimension

# Load pre-trained BERT model
test_model = BertModel.from_pretrained('bert-base-uncased')

# Extract contextualized embeddings
with torch.no_grad():
    test_outputs = test_model(test_input_ids)

# Obtain embeddings for each token
test_word_embeddings = test_outputs.last_hidden_state.squeeze(0)

# Define a simple classification model for aspect identification
class AspectClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(AspectClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

# Instantiate the aspect identification model
test_input_size = test_word_embeddings.size(1)
test_hidden_size = 256
test_output_size = 1
test_model = AspectClassifier(test_input_size, test_hidden_size, test_output_size)

# Load the trained weights (replace 'path_to_your_model_weights.pth' with your actual file path)
test_model.load_state_dict(torch.load('weights.pth'))
test_model.eval()  # Set the model to evaluation mode

# Evaluate the model on the test set
with torch.no_grad():
    test_predictions = (test_model(test_word_embeddings) > 0.5).float()

# Convert predictions and labels to numpy arrays for evaluation metrics
test_predictions_np = test_predictions.numpy().flatten()
test_labels_np = test_labels.numpy()

# Calculate evaluation metrics
accuracy = accuracy_score(test_labels_np, test_predictions_np)
precision = precision_score(test_labels_np, test_predictions_np)
recall = recall_score(test_labels_np, test_predictions_np)
f1 = f1_score(test_labels_np, test_predictions_np)

# Extract the aspects from the test predictions
predicted_aspects = [test_tokens[i] for i in range(len(test_tokens)) if test_predictions_np[i] == 1]

# Print the identified aspects
print("Identified Aspects:")
print(predicted_aspects)

# Print evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Identified Aspects:
['[CLS]', 'display', 'software']
Accuracy: 0.9333
Precision: 1.0000
Recall: 0.7500
F1 Score: 0.8571
