In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertModel, BertTokenizer
from torch.utils.data import Dataset, DataLoader

# Define the Sentence Transformer model with a classifier
class SentenceTransformer(nn.Module):
    def __init__(self, pretrained_model_name='bert-base-uncased', num_classes=3):
        super(SentenceTransformer, self).__init__()
        self.tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)  # Initialize the BERT tokenizer
        self.bert = BertModel.from_pretrained(pretrained_model_name)  # Load pre-trained BERT model
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)  # Classifier layer
        self.relu = nn.ReLU()  # ReLU activation function
        self.dropout = nn.Dropout(0.1)  # Dropout layer

    def forward(self, input_sentences):
        input_ids = self.tokenizer(input_sentences, padding=True, truncation=True, return_tensors="pt")['input_ids']  # Tokenize input sentences
        outputs = self.bert(input_ids)  # Pass input_ids to BERT model
        pooled_output = outputs.pooler_output  # Get pooled output
        pooled_output = self.dropout(pooled_output)  # Apply dropout
        logits = self.classifier(pooled_output)  # Get logits from the classifier
        return logits, pooled_output  # Return logits and pooled output

    def predict(self, input_sentences):
        with torch.no_grad():
            logits, _ = self(input_sentences)  # Get logits for input sentences
            _, predicted = torch.max(logits, 1)  # Get predicted labels
        return predicted  # Return predicted labels

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, sentences, labels, tokenizer):
        self.sentences = sentences  # List of input sentences
        self.labels = labels  # List of labels
        self.tokenizer = tokenizer  # Tokenizer object

    def __len__(self):
        return len(self.sentences)  # Return length of dataset

    def __getitem__(self, idx):
        sentence = self.sentences[idx]  # Get sentence at index idx
        label = self.labels[idx]  # Get label at index idx
        return sentence, label  # Return sentence and label as a tuple

# Define some sample sentences and labels for training
train_sentences = [
    "I love this movie!",
    "This is the worst film.",
    "It was an average experience.",
    "Absolutely fantastic!",
    "Terrible, I hated it."
]
train_labels = [2, 0, 1, 2, 0]  # 2: positive, 0: negative, 1: neutral

# Initialize the model
model = SentenceTransformer()

# Define a loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss function
optimizer = optim.Adam(model.parameters(), lr=0.0001)  # Adam optimizer with a learning rate of 0.0001

# Create a DataLoader for the training data
train_dataset = CustomDataset(train_sentences, train_labels, model.tokenizer)  # Create custom dataset object
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)  # Create DataLoader object for training data

# Training loop
num_epochs = 20  # Number of epochs for training
model.train()  # Set model to training mode

for epoch in range(num_epochs):
    total_loss = 0  # Initialize total loss for the epoch
    for sentences, labels in train_dataloader:
        optimizer.zero_grad()  # Zero gradients
        logits, _ = model(sentences)  # Get logits from the model
        labels = labels.clone().detach().to(torch.long)  # Convert labels to long tensor
        loss = criterion(logits, labels)  # Calculate loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Optimizer step
        total_loss += loss.item()  # Accumulate total loss

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_dataloader)}")  # Print epoch loss

# Set the model to evaluation mode
model.eval()  # Set model to evaluation mode

# Define some test sentences
test_sentences = [
    "This movie was amazing!",
    "I didn't enjoy the film.",
    "It was mediocre"
]
# Make predictions on test sentences
for sentence in test_sentences:
    predicted_label = model.predict([sentence])  # Get predicted label for the test sentence
    print("Sentence:", sentence)
    print("Predicted Label:", predicted_label.item())  # Print predicted label
    print()

# Encode the test sentences and print embeddings
with torch.no_grad():
    for sentence in test_sentences:
        logits, embedding = model([sentence])  # Get logits and embedding for the test sentence
        print("Sentence:", sentence)
        print("Embedding:", embedding.numpy())  # Print the embedding as numpy array
        print("Shape:", embedding.shape)  # Print the shape of the embedding
        print()



Epoch 1, Loss: 1.204245885213216
Epoch 2, Loss: 0.9329891602198283
Epoch 3, Loss: 0.7629186312357584
Epoch 4, Loss: 0.605431059996287
Epoch 5, Loss: 0.46378034353256226
Epoch 6, Loss: 0.3771019180615743
Epoch 7, Loss: 0.2372241566578547
Epoch 8, Loss: 0.2603372981150945
Epoch 9, Loss: 0.16032956540584564
Epoch 10, Loss: 0.09918709844350815
Epoch 11, Loss: 0.07036565989255905
Epoch 12, Loss: 0.0449579618871212
Epoch 13, Loss: 0.03944162838160992
Epoch 14, Loss: 0.03910525143146515
Epoch 15, Loss: 0.015205768402665854
Epoch 16, Loss: 0.013717139760653177
Epoch 17, Loss: 0.01712964568287134
Epoch 18, Loss: 0.011085777233044306
Epoch 19, Loss: 0.01072899562617143
Epoch 20, Loss: 0.010122761751214663
Sentence: This movie was amazing!
Predicted Label: 2

Sentence: I didn't enjoy the film.
Predicted Label: 0

Sentence: It was mediocre
Predicted Label: 1

Sentence: This movie was amazing!
Embedding: [[ 9.03306246e-01  6.38883233e-01  9.99999404e-01 -9.90139127e-01
  -9.99347866e-01  4.69933450