In [1]:
from transformers import BertTokenizer, BertModel
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import spacy

# Technology Domain
technology_text = """
In the rapidly evolving landscape of technology, artificial intelligence (AI) continues to redefine the way we interact with machines. From advanced robotics to machine learning algorithms, innovations are reshaping industries. Quantum computing is on the horizon, promising unprecedented processing power, while blockchain technology revolutionizes secure and transparent transactions. As the Internet of Things (IoT) expands, interconnected devices create a seamless web of data, enhancing efficiency and connectivity. Cybersecurity measures are crucial to safeguarding sensitive information in this digital era.
"""

# Healthcare Domain
healthcare_text = """
In the realm of healthcare, medical breakthroughs are transforming patient care. Precision medicine tailors treatments to individual genetic profiles, while telemedicine bridges the gap between patients and healthcare providers. Advanced imaging technologies like MRI and CT scans enable accurate diagnostics, and robotic-assisted surgeries enhance precision in the operating room. Biotechnology fuels the development of innovative drugs, while wearable devices track and monitor personal health metrics. Healthcare informatics facilitates seamless data exchange for improved decision-making.
"""

# Travel Domain
travel_text = """
The travel industry is undergoing a transformation with the integration of cutting-edge technologies. Online booking platforms streamline travel arrangements, while virtual reality enhances pre-trip experiences. Sustainable travel initiatives promote eco-friendly practices, and smart luggage simplifies the journey. Artificial intelligence assists in personalized travel recommendations, and mobile applications provide real-time updates on flights and accommodations. Autonomous vehicles and smart cities contribute to seamless and efficient transportation experiences.
"""

sentence = technology_text + healthcare_text + travel_text

technology_nouns_phrases = [
    "Artificial intelligence",
    "Machine learning",
    "Robotics",
    "Quantum computing",
    "Blockchain technology",
    "Internet of Things (IoT)",
    "Cybersecurity",
    "Data analytics",
    "Automation",
    "Augmented reality"
]

healthcare_nouns_phrases = [
    "Precision medicine",
    "Telemedicine",
    "Medical breakthroughs",
    "Imaging technologies",
    "Robotic-assisted surgery",
    "Biotechnology",
    "Wearable devices",
    "Healthcare informatics",
    "Personalized healthcare",
    "Health metrics tracking"
]

travel_nouns_phrases = [
    "booking platforms",
    "Virtual reality",
    "Sustainable travel initiatives",
    "Smart luggage",
    "Artificial intelligence in travel",
    "Mobile applications",
    "Real-time updates",
    "Autonomous vehicles",
    "Smart cities",
    "Transportation experiences"
]

nouns_phrases = (
    technology_nouns_phrases +
    healthcare_nouns_phrases +
    travel_nouns_phrases
)

# Load the English language model
nlp = spacy.load('en_core_web_sm')

def remove_stopwords_punctuation(text):
    # Process the text using SpaCy
    doc = nlp(text)

    # Filter out stop words and punctuation
    filtered_tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]

    # Join the filtered tokens back into a string
    filtered_text = ' '.join(filtered_tokens)

    return filtered_text


# Apply the function to the provided text
sentence = remove_stopwords_punctuation(sentence)

# Tokenize the test sentence
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sentence)))

# Identify test noun chunks
noun_phrase = (
    technology_nouns_phrases +
    healthcare_nouns_phrases +
    travel_nouns_phrases
)

# Convert test noun chunks to token positions
phrase_positions = []
for chunk in noun_phrase:
    chunk_tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(chunk)))
    phrase_positions.append([i for i, token in enumerate(tokens) if token in chunk_tokens])

# Convert test tokens to IDs
input_ids = tokenizer.convert_tokens_to_ids(tokens)
phrase_positions_ids = [item for sublist in phrase_positions for item in sublist]

# Create a binary label tensor where 1 indicates an aspect and 0 otherwise
labels = [1 if i in phrase_positions_ids else 0 for i in range(len(tokens))]
labels = torch.tensor(labels)

# Convert input_ids to tensor
input_ids = torch.tensor(input_ids).unsqueeze(0)  # Add batch dimension

# Load pre-trained BERT model
model = BertModel.from_pretrained('bert-base-uncased')

# Extract contextualized embeddings
with torch.no_grad():
    outputs = model(input_ids)

# Obtain embeddings for each token
word_embeddings = outputs.last_hidden_state.squeeze(0)

# Define a classification model for aspect identification
class AspectClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(AspectClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

# Instantiate the aspect identification model
input_size = word_embeddings.size(1)
hidden_size = 256
output_size = 1
model = AspectClassifier(input_size, hidden_size, output_size)


# Save the trained model weights
#torch.save(model.state_dict(), 'aspect_model.pth')

# Load the trained weights (replace 'path_to_your_model_weights.pth' with your actual file path)
model.load_state_dict(torch.load('aspect_model.pth'))
model.eval()  # Set the model to evaluation mode


# Evaluate the model on the test set
with torch.no_grad():
    predictions = (model(word_embeddings) > 0.5).float()


# Convert predictions and labels to numpy arrays for evaluation metrics
predictions_np = predictions.numpy().flatten()
labels_np = labels.numpy()


# Extract the aspects and probabilities from the test predictions
aspects_and_probs = [(tokens[i], predictions_np[i]) for i in range(len(tokens)) if predictions_np[i] == 1]

# Sort the aspects based on their probabilities in descending order
aspects_and_probs.sort(key=lambda x: x[1], reverse=True)

# Select the top 10 aspects
top_10_aspects = aspects_and_probs[:10]

# Extract the aspects and probabilities separately
top_10_aspects, top_10_probs = zip(*top_10_aspects)

# Calculate metrics for each of the top 10 aspects separately
for aspect in top_10_aspects:
    aspect_positions = [i for i, token in enumerate(tokens) if token == aspect]

    # Check if aspect_positions is not empty
    if aspect_positions:
        aspect_labels = [1 if i in aspect_positions else 0 for i in range(len(tokens))]

        aspect_labels_np = torch.tensor(aspect_labels).numpy()

        # Extract predictions only for all positions in the original sentence
        aspect_predictions_np = predictions_np[:len(tokens)]

        aspect_accuracy = accuracy_score(aspect_labels_np, aspect_predictions_np)
        aspect_precision = precision_score(aspect_labels_np, aspect_predictions_np)
        aspect_recall = recall_score(aspect_labels_np, aspect_predictions_np)
        aspect_f1 = f1_score(aspect_labels_np, aspect_predictions_np)

        # Print metrics for each aspect
        print(f"\nMetrics for Aspect '{aspect}':")
        print(f"Accuracy: {aspect_accuracy:.4f}")
        print(f"Precision: {aspect_precision:.4f}")
        print(f"Recall: {aspect_recall:.4f}")
        print(f"F1 Score: {aspect_f1:.4f}")
    else:
        print(f"\nAspect '{aspect}' not found in the test sentence.")


  from .autonotebook import tqdm as notebook_tqdm




Metrics for Aspect '[CLS]':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110

Metrics for Aspect 'rapidly':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110

Metrics for Aspect 'evolving':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110

Metrics for Aspect 'landscape':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110

Metrics for Aspect 'technology':
Accuracy: 0.1055
Precision: 0.0111
Recall: 1.0000
F1 Score: 0.0220

Metrics for Aspect 'artificial':
Accuracy: 0.1055
Precision: 0.0111
Recall: 1.0000
F1 Score: 0.0220

Metrics for Aspect 'intelligence':
Accuracy: 0.1055
Precision: 0.0111
Recall: 1.0000
F1 Score: 0.0220

Metrics for Aspect 'ai':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110

Metrics for Aspect 'continues':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110

Metrics for Aspect 'red':
Accuracy: 0.1005
Precision: 0.0056
Recall: 1.0000
F1 Score: 0.0110
