In [2]:
from transformers import BertTokenizer, BertModel
import torch
# Initialize BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Define prompt template for BERT encoder
prompt_template = """
Text: {text}

Please analyze the above text using BERT encoding.
"""

# Example usage
sample_text = "This is a sample text for BERT encoding."
formatted_prompt = prompt_template.format(text=sample_text)

# Tokenize and encode
inputs = tokenizer(sample_text, return_tensors='pt', padding=True, truncation=True)

# Get BERT embeddings
with torch.no_grad():
    outputs = model(**inputs)
    last_hidden_state = outputs.last_hidden_state
    cls_embedding = outputs.pooler_output

print("Prompt template created successfully")
print(f"CLS embedding shape: {cls_embedding.shape}")

Prompt template created successfully
CLS embedding shape: torch.Size([1, 768])


In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import accuracy_score, classification_report

import torch.nn as nn

# Define a BERT-based classifier
class BertClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BertClassifier, self).__init__()
        self.bert = model
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(768, num_classes)
    
    def forward(self, input_ids, attention_mask, token_type_ids):
        outputs = self.bert(input_ids=input_ids, 
                           attention_mask=attention_mask,
                           token_type_ids=token_type_ids)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        return logits

# Few-shot learning setup
def create_few_shot_dataset(texts, labels, n_shot=1):
    """Create a few-shot dataset with n examples per class"""
    encoded_inputs = tokenizer(texts, padding=True, truncation=True, 
                               return_tensors='pt', max_length=512)
    
    input_ids = encoded_inputs['input_ids']
    attention_mask = encoded_inputs['attention_mask']
    token_type_ids = encoded_inputs['token_type_ids']
    labels_tensor = torch.tensor(labels)
    
    return TensorDataset(input_ids, attention_mask, token_type_ids, labels_tensor)

# Example: Binary classification task (positive/negative sentiment)
# One-shot example (1 example per class)
one_shot_texts = [
    "This product is absolutely amazing!",  # positive
    "This is the worst purchase ever."      # negative
]
one_shot_labels = [1, 0]  # 1 = positive, 0 = negative

# Few-shot example (3 examples per class)
few_shot_texts = [
    "This product is absolutely amazing!", "I love it!", "Excellent quality!",
    "This is the worst purchase ever.", "Terrible experience.", "Very disappointed."
]
few_shot_labels = [1, 1, 1, 0, 0, 0]

# Initialize classifier
num_classes = 2
bert_classifier = BertClassifier(num_classes=num_classes)

# Create datasets
one_shot_dataset = create_few_shot_dataset(one_shot_texts, one_shot_labels)
few_shot_dataset = create_few_shot_dataset(few_shot_texts, few_shot_labels)

print(f"One-shot dataset size: {len(one_shot_dataset)}")
print(f"Few-shot dataset size: {len(few_shot_dataset)}")
print("\nClassifier initialized successfully for few-shot learning")

ModuleNotFoundError: No module named 'sklearn'