In [71]:
import torch
import numpy as np

In [72]:
from datasets import load_dataset

dataset = load_dataset("sst2")

def make_prompt(n_shots, test_review):

    prefix = 'This is a movie review dataset with associated positive or negative sentiment: \n'

    sentences = dataset['train'][0:n_shots]['sentence']
    labels = dataset['train'][0:n_shots]['label'] # negative (0) or positive (1)
    labels_str = ['negative' if label == 0 else 'positive' for label in labels]

    shots = ""

    for sentence, label in zip(sentences, labels_str):
        shots = shots + "'" + sentence + "'" + ' - ' + label + ', \n '

    return prefix + shots + "'" + test_review + "'" + ' -'

Found cached dataset sst2 (/Users/henningheyen/.cache/huggingface/datasets/sst2/default/2.0.0/9896208a8d85db057ac50c72282bcb8fe755accc671a57dd8059d4e130961ed5)


  0%|          | 0/3 [00:00<?, ?it/s]

In [73]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

def generate_text(model_name, prompt, n_generated_tokens=2):
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)

    # Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Perform a single inference step
    output = model.generate(input_ids, max_length=input_ids.size(1) + n_generated_tokens, num_return_sequences=1)

    # Extract and decode the generated token
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Print the generated token
    print("Generated Text:", generated_text)
    print("Last word: ", generated_text.split()[-1])


In [74]:
def predict_token(prompt, model_name='gpt2', top_k=5, verbose=True):
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)

    # Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Perform a single inference step
    output = model.forward(input_ids)

    probs = torch.nn.functional.softmax(output.logits[0,-1,:], dim=0)

    # Get the top 5 tokens and their probabilities
    top_probs, top_indices = torch.topk(probs, k=top_k)

    # Convert the indices to tokens
    tokens = tokenizer.convert_ids_to_tokens(top_indices.tolist())

    if verbose:
        # Print the top 5 tokens and their probabilities
        for i in range(top_k):
            print(tokens[i], top_probs[i].item())

    return probs
    

In [75]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
print(tokenizer.convert_ids_to_tokens(4633))
print(tokenizer.convert_ids_to_tokens(3967))

Ġnegative
Ġpositive


In [76]:
test_review = dataset['train'][33]['sentence']

In [77]:
prompt = make_prompt(n_shots=20, test_review = test_review)

In [78]:
prompt

"This is a movie review dataset with associated positive or negative sentiment: \n'hide new secretions from the parental units ' - negative, \n 'contains no wit , only labored gags ' - negative, \n 'that loves its characters and communicates something rather beautiful about human nature ' - positive, \n 'remains utterly satisfied to remain the same throughout ' - negative, \n 'on the worst revenge-of-the-nerds clichés the filmmakers could dredge up ' - negative, \n 'that 's far too tragic to merit such superficial treatment ' - negative, \n 'demonstrates that the director of such hollywood blockbusters as patriot games can still turn out a small , personal film with an emotional wallop . ' - positive, \n 'of saucy ' - positive, \n 'a depressed fifteen-year-old 's suicidal poetry ' - negative, \n 'are more deeply thought through than in most ` right-thinking ' films ' - positive, \n 'goes to absurd lengths ' - negative, \n 'for those moviegoers who complain that ` they do n't make movie

In [79]:
probs = predict_token(prompt, model_name='gpt2', top_k=5)

Ġnegative 0.5850855708122253
Ġpositive 0.3389498293399811
Ġ 0.004583791363984346
Ġneutral 0.0024384758435189724
Ġ' 0.0015141303883865476


In [80]:
#probs = predict_token(prompt, model_name='gpt2-large', top_k=5)

In [81]:
def predict_for_lime(prompts):

    results = []

    for prompt in prompts:

        probs = predict_token(prompt, verbose=False)

        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

        token_probs = probs[tokenizer.convert_tokens_to_ids(['Ġnegative', 'Ġpositive'])] # 3967: id for Ġpositive, 4633: id for Ġnegative
        
        norm_probs = token_probs / torch.sum(token_probs) # normalizing

        results.append([norm_probs[0].item(), norm_probs[1].item()])

    return np.array(results)
    

In [82]:
predict_for_lime([prompt])

array([[0.63318521, 0.36681476]])

In [84]:
from lime.lime_text import LimeTextExplainer

class_names = ['negative', 'positive']
explainer = LimeTextExplainer(class_names=class_names)


explanation = explainer.explain_instance(prompt, predict_for_lime, num_samples=10)
print("Prompt: ", prompt)
print("LIME Explanation:")
explanation.show_in_notebook(text=True)
print('-----------------------')

