In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m63.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


In [24]:
"""Reward function in conversation will be made up of three components:
- r_c = congruence reward: how likely is the agent to have said what the respondent said (negative KL divergence of the next token probabilities)
- r_s = sentiment reward: how positive was the sentiment of the respondent (use a pre-existing sentiment model)
- r_a = affection reward: how much does the agent like the respondent (use discounted sum of previous rewards)

Here, we build the congruence and sentiment reward.
"""

import os
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from google.colab import drive
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import AutoTokenizer, AutoModelForSequenceClassification


def get_congruence_reward(comment_ids, response_ids_trunc, agent_model):
  """Iterate through respondent's response to agent's comment, adding each token to the
  prompt each time, and get the KL divergence for what the agent would have said instead.
  Take the mean of all KL divergences at the end to give congruence reward.
  
  Args:
    comment_ids (torch tensor): IDs of agent's comment, including original query.
    response_ids_trunc (torch tensor): IDs of respondent's response, not including
      original comment or query
    agent_model (transformers GPT2LMHeadModel): the agent model

  Returns:
    float: Congruence reward value
  """

  rewards = list()
  target_ids = comment_ids.clone()

  for id in response_ids_trunc[0][:2]:
    agent_output = agent(target_ids)
    agent_probs = F.log_softmax(agent_output.logits[0][-1], dim=0) # Predicted probs
    reward = agent_probs[id] # Calculate KL divergence (same as cross-entropy in this case)
    rewards.append(reward.item())
    target_ids = torch.cat((target_ids.squeeze(0), id.unsqueeze(0)), dim=0).unsqueeze(0)

  congruence_reward = np.mean(rewards)

  return congruence_reward


def get_sentiment_reward(response_text_trunc, sentiment_tokenizer, sentiment_model):
  """Get a scalar reward corresponding to sentiment of respondent's response.
  
  Args:
    response_text_trunc (torch tensor): text of respondent's response, not including
      original comment or query
    sentiment_tokenizer (transformers AutoTokenizer): tokenizer for sentiment model
    sentiment_tokenizer (transformers AutoModelForSequenceClassification):
      sentiment model

  Returns:
    float: Sentiment reward value
  """

  # Get sentiment probabilities from model (negative, neutral, or positive)
  sentiment_response_ids = sentiment_tokenizer.encode(response_text_trunc, return_tensors="pt").to('cuda')
  sentiment_probs = F.softmax(sentiment_model(sentiment_response_ids).logits.detach(), dim=1)[0]

  # Calculate the reward as the positive probability minus the negative probability
  sentiment_reward = (sentiment_probs[2] - sentiment_probs[0]).item()
  
  return sentiment_reward


drive.mount('/content/drive')
project_path = './drive/MyDrive/Colab Notebooks/GPT_community/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
# Load in prompts
prompts_file = os.path.join(project_path, 'data/brighton_philosophy_prompts.txt')
with open(prompts_file) as file:
    prompts = [line.rstrip() for line in file]

# Create agent and respondent models
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
pad_token_id = tokenizer.eos_token_id
agent = GPT2LMHeadModel.from_pretrained('Linus4Lyf/Kant_Metaphysics_Of_Morals').to('cuda')
respondent = GPT2LMHeadModel.from_pretrained('Linus4Lyf/Hume_A_Treatise_Of_Human_Nature').to('cuda')

# Create sentiment model
sent_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
sent_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment").to('cuda')

In [25]:
questioner_name = 'Socrates'
agent_name = 'Kant'
respondent_name = 'Hume'

# Get query from questions list
query_text = f"{questioner_name}: " + np.random.choice(prompts)
print(query_text, '\n')
query_text += f"\n{agent_name}: "

# Encode query and get comment from agent
query_ids = tokenizer.encode(query_text, return_tensors='pt').to('cuda')
comment_ids = agent.generate(query_ids, do_sample=True, temperature=0.9, max_new_tokens=200, pad_token_id=pad_token_id, eos_token_id=pad_token_id)
comment_text = tokenizer.batch_decode(comment_ids)[0]
print('--------------------------------------------------------------------------------------')
print(comment_text, '\n')
comment_text += f"\n{respondent_name}: "

# Get response from respondent
comment_ids = tokenizer.encode(comment_text, return_tensors='pt').to('cuda')
response_ids = respondent.generate(comment_ids, do_sample=True, temperature=0.9, max_new_tokens=200, pad_token_id=pad_token_id, eos_token_id=pad_token_id)
response_text = tokenizer.batch_decode(response_ids)[0]
print('--------------------------------------------------------------------------------------')
print(response_text, '\n')

# Remove original query and comment from response text
response_text_trunc = response_text.replace(comment_text, '')[1:]
response_ids_trunc = tokenizer.encode(response_text_trunc, return_tensors='pt').to('cuda')
print('--------------------------------------------------------------------------------------')
print(response_text_trunc, '\n')

# Get congruence reward value for response
congruence_reward = get_congruence_reward(comment_ids, response_ids_trunc, agent)
sentiment_reward = get_sentiment_reward(response_text_trunc, sent_tokenizer, sent_model)
print(f"Congruence reward = {congruence_reward}, Sentiment reward = {sentiment_reward}")

Socrates: Are we biological machines? 

--------------------------------------------------------------------------------------
Socrates: Are we biological machines?
Kant: 

*I have never known of a person who could, however, know of any thing which could exist, which would in this respect serve as a precedent, and perhaps as a case of the will. The reason is not that in reality we cannot know anything, for it, however, is a question of feeling, because we act in a certain sense by the reason, and that in order to imagine oneself to be a machine, or even of thinking as the will, we must imagine ourselves to be conscious of another machine which is capable of being conscious of us; and that in this way both our will and our reason must have a very long acquaintance with one another.

It is, however, not the case that we should apprehend as a thing a being capable of being conscious of us that such a thing being as we conceive must possess the intelligence which is necessary to be a machi