# Querying GPT2 Locally
This notebook will perform the first experiments on querying a small GPT2 model for personality traits

In [1]:
# Imports
import torch, json, tqdm, sys, random
import torch.nn.functional as F
import numpy as np
from transformers import (
    GPT2Tokenizer,
    GPT2LMHeadModel,
    AutoModelForSequenceClassification,
    AutoTokenizer
)

In [2]:
# GPT2 Generation
model_name = 'gpt2' # <-- Change this per model
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name).eval()
sent_checkpoints = ['distilbert-base-uncased-finetuned-sst-2-english']
# sent_tokenizer = BartTokenizer.from_pretrained('textattack/facebook-bart-large-SST-2')
# sent_model = BartForSequenceClassification.from_pretrained('textattack/facebook-bart-large-SST-2', num_labels=2)

def logit_to_single_score(logits: torch.Tensor):
    # Label 0 is negative, label 1 is positive
    logits = logits[0]
    logits = F.softmax(logits, dim=0)
    logits = logits.cpu().detach().numpy()
    neg_score = -1 * float(logits[0])
    pos_score = float(logits[1])
    return pos_score + neg_score

def get_question_logits(question: str):
    logits = []
    for checkpoint in sent_checkpoints:
        sent_model = AutoModelForSequenceClassification.from_pretrained(checkpoint).eval()
        sent_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        inputs = sent_tokenizer(question, return_tensors="pt")
        outputs = sent_model(**inputs)
        logits.append(outputs.logits)
    return logits

def compute_cosine(tensor_1: torch.Tensor, tensor_2: torch.Tensor):
    # Compute the cosine between 2 tensors by computing their magnitudes and dot product
    if len(tensor_1.shape) == 2:
        tensor_1 = torch.squeeze(tensor_1)
    if len(tensor_2.shape) == 2:
        tensor_2 = torch.squeeze(tensor_2)
    dot_prod = torch.dot(tensor_1, tensor_2)
    mag_1 = torch.linalg.norm(tensor_1)
    mag_2 = torch.linalg.norm(tensor_2)
    cosine = (dot_prod / (mag_1 * mag_2)).detach().cpu().numpy()
    return cosine

def get_sent_score(q_logits: list[torch.Tensor], phrase: str):
    scores = []
    # Compute and compare logits for each model
    for i, checkpoint in enumerate(sent_checkpoints):
        question_logits = q_logits[i]
        sent_model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
        sent_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
        inputs = sent_tokenizer(phrase, return_tensors="pt")
        outputs = sent_model(**inputs)
        scores.append(compute_cosine(outputs.logits, question_logits))
    
    # Shift the mean score by the variance of the distribution
    score = np.mean(scores) - np.std(scores)**2
    return score
    
# Load the filtered questions
with open('./data/questions.json', 'r') as f:
    questions = json.load(f)
# TODO: Output JSON?
output_dict = {}
# DEBUG: limit to single item
questions = questions[0:1]
for i, question in enumerate(tqdm.tqdm(questions)):
    input_ids = tokenizer.encode(question['question'], return_tensors='pt')
    sample_outputs = model.generate(
        input_ids,
        do_sample=True, 
        max_length=len(input_ids[0]) + 16, 
        top_k=200, 
        top_p=0.95,
        num_return_sequences=5
    )

    # Perform scoring and storing outputs
    output_dict[i] = {}
    output_dict[i]['question'] = question['text'].strip()
    output_dict[i]['responses'] = []
    logits = get_question_logits(question['text'].strip())

    for sample_output in sample_outputs:
        response_dict = {}
        out_str = tokenizer.decode(sample_output, skip_special_tokens=True)[len(question['question']) - 1:]
        out_str = out_str.split('\n')[0]
        response_dict['text'] = out_str
        response_dict['score'] = get_sent_score(logits, out_str)
        response_dict['facet'] = question['facet']
        response_dict['domain'] = question['domain']
        response_dict['reverse_score'] = question['reverse_score']
        output_dict[i]['responses'].append(response_dict)

with open(f"./data/{model_name}-out.json", 'w') as f:
    json.dump(output_dict, f, indent=4)


  0%|          | 0/1 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
100%|██████████| 1/1 [00:17<00:00, 17.64s/it]


In [7]:
print(len(input_ids[0]))

62


In [10]:
print()

50256
