In [1]:
!pip install torch transformers datasets evaluate

Collecting evaluate
  Obtaining dependency information for evaluate from https://files.pythonhosted.org/packages/c2/d6/ff9baefc8fc679dcd9eb21b29da3ef10c81aa36be630a7ae78e4611588e1/evaluate-0.4.2-py3-none-any.whl.metadata
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
   ---------------------------------------- 0.0/84.1 kB ? eta -:--:--
   --------- ------------------------------ 20.5/84.1 kB 640.0 kB/s eta 0:00:01
   -------------- ------------------------- 30.7/84.1 kB 435.7 kB/s eta 0:00:01
   ------------------------ --------------- 51.2/84.1 kB 327.7 kB/s eta 0:00:01
   ---------------------------------------- 84.1/84.1 kB 429.6 kB/s eta 0:00:00
Installing collected packages: evaluate
Successfully installed evaluate-0.4.2


In [4]:
pip install torch torchvision torchaudio

Note: you may need to restart the kernel to use updated packages.


In [6]:
import torch
import transformers
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import datasets
from datasets import load_dataset, DatasetDict
import evaluate
import numpy as np

# Ensure correct versions
print(f"Using torch version: {torch.__version__}")
print(f"Using transformers version: {transformers.__version__}")

try:
    # Load the dataset
    dataset = load_dataset('imdb')

    # Load the BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    print("Tokenizer loaded successfully.")
    
    # Tokenize the dataset
    def tokenize_function(examples):
        return tokenizer(examples['text'], padding='max_length', truncation=True)
    
    tokenized_datasets = dataset.map(tokenize_function, batched=True)

    # Split the dataset into training and validation sets
    train_test_valid = tokenized_datasets['train'].train_test_split(test_size=0.1)
    train_valid_test = DatasetDict({
        'train': train_test_valid['train'],
        'validation': train_test_valid['test']
    })
    
    # Load the BERT model for sequence classification
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
    print("Model loaded successfully.")
    
    # Define training arguments
    training_args = TrainingArguments(
        output_dir='./results',
        eval_strategy='epoch',  # Updated here
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
    )
    
    # Define the evaluation metric using the new evaluate library
    metric = evaluate.load('accuracy', trust_remote_code=True)
    
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return metric.compute(predictions=predictions, references=labels)
    
    # Initialize the Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_valid_test['train'],
        eval_dataset=train_valid_test['validation'],
        compute_metrics=compute_metrics,
    )
    
    # Train the model
    trainer.train()
    
    # Evaluate the model
    eval_result = trainer.evaluate()
    print(f"Accuracy: {eval_result['eval_accuracy']:.4f}")
    
    # Perform inference
    def predict(text):
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits, dim=-1).item()
        return 'positive' if prediction == 1 else 'negative'
    
    # Example usage
    texts = [
        "I love this product! It has changed my life for the better.",
        "This is the worst thing I have ever bought. Completely useless.",
        "Had an amazing experience with their customer service.",
        "The product is okay, not too bad but not great either."
    ]
    
    for text in texts:
        sentiment = predict(text)
        print(f"Text: {text}\nSentiment: {sentiment}\n")
except Exception as e:
    print(f"An error occurred: {e}")

OSError: [WinError 126] The specified module could not be found. Error loading "G:\New folder\Lib\site-packages\torch\lib\fbgemm.dll" or one of its dependencies.