In [1]:
import torch
from transformers import pipeline

# Create a sentiment analysis pipeline
sentiment_analyzer = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

# Test text
text = "I absolutely love this product! Would buy again."

# Get the sentiment
result = sentiment_analyzer(text)
print(f"Sentiment: {result[0]['label']}")
print(f"Confidence: {result[0]['score']:.4f}")

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cuda:0


Sentiment: POSITIVE
Confidence: 0.9999


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

class BERTSentimentAnalyzer:
    def __init__(self, model_name="distilbert-base-uncased-finetuned-sst-2-english"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.model.eval()
        self.labels = ['NEGATIVE', 'POSITIVE']

    def preprocess_text(self, text):
        # Remove extra whitespace and normalize
        text = ' '.join(text.split())

        # Tokenize with BERT-specific tokens
        inputs = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=512,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Move to GPU if available
        return {k: v.to(self.device) for k, v in inputs.items()}

    def predict(self, text):
        # Prepare text for model
        inputs = self.preprocess_text(text)

        # Get model predictions
        with torch.no_grad():
            outputs = self.model(**inputs)
            probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)

        # Convert to human-readable format
        prediction_dict = {
            'text': text,
            'sentiment': self.labels[probabilities.argmax().item()],
            'confidence': probabilities.max().item(),
            'probabilities': {
                label: prob.item()
                for label, prob in zip(self.labels, probabilities[0])
            }
        }
        return prediction_dict

In [4]:
 

def demonstrate_sentiment_analysis():
    # Initialize analyzer
    analyzer = BERTSentimentAnalyzer()

    # Test texts
    texts = [
        "This product completely transformed my workflow!",
        "Terrible experience, would not recommend.",
        "It's decent for the price, but nothing special."
    ]

    # Analyze each text
    for text in texts:
        result = analyzer.predict(text)
        print(f"\nText: {result['text']}")
        print(f"Sentiment: {result['sentiment']}")
        print(f"Confidence: {result['confidence']:.4f}")
        print("Detailed probabilities:")
        for label, prob in result['probabilities'].items():
            print(f"  {label}: {prob:.4f}")

# Running demonstration
demonstrate_sentiment_analysis()


Text: This product completely transformed my workflow!
Sentiment: POSITIVE
Confidence: 0.9997
Detailed probabilities:
  NEGATIVE: 0.0003
  POSITIVE: 0.9997

Text: Terrible experience, would not recommend.
Sentiment: NEGATIVE
Confidence: 0.9934
Detailed probabilities:
  NEGATIVE: 0.9934
  POSITIVE: 0.0066

Text: It's decent for the price, but nothing special.
Sentiment: NEGATIVE
Confidence: 0.9897
Detailed probabilities:
  NEGATIVE: 0.9897
  POSITIVE: 0.0103


**NER**

In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification

class BERTNamedEntityRecognizer:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
        self.model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.model.eval()

    def recognize_entities(self, text):
        # Tokenize input text
        inputs = self.tokenizer(
            text,
            add_special_tokens=True,
            return_tensors="pt",
            padding=True,
            truncation=True
        )

        # Move inputs to device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        # print(inputs)

        # Get predictions
        with torch.no_grad():
            outputs = self.model(**inputs)
            predictions = outputs.logits.argmax(-1)

        # Convert predictions to entities
        tokens = self.tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
        labels = [self.model.config.id2label[p.item()] for p in predictions[0]]
        # print(labels)

        # Extract entities
        entities = []
        current_entity = None

        for token, label in zip(tokens, labels):
            if label.startswith('B-'):
                if current_entity:
                    entities.append(current_entity)
                current_entity = {'type': label[2:], 'text': token}
            elif label.startswith('I-') and current_entity:
                if token.startswith('##'):
                    current_entity['text'] += token[2:]
                else:
                    current_entity['text'] += ' ' + token
            elif label == 'O':
                if current_entity:
                    entities.append(current_entity)
                    current_entity = None

        if current_entity:
            entities.append(current_entity)

        return entities

In [7]:
def demonstrate_ner():
    # Initialize recognizer
    ner = BERTNamedEntityRecognizer()

    # Example text
    text = """
    Apple CEO Tim Cook announced new AI features at their headquarters 
    in Cupertino, California. Microsoft and Google are also investing 
    heavily in artificial intelligence research.
    """

    # Get entities
    entities = ner.recognize_entities(text)

    # Display results
    print("Found entities:")
    for entity in entities:
        print(f"- {entity['text']} ({entity['type']})")

# Running demonstration
demonstrate_ner()

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Found entities:
- Apple (ORG)
- Tim Cook (PER)
- AI (MISC)
- Cupertino (LOC)
- California (LOC)
- Microsoft (ORG)
- Google (ORG)
