Notebook for score normalization.

In [None]:
# Imports
import pandas as pd
import pickle
import numpy as np

In [None]:
# May be needed for numerical stability, with GPT2 especially.
def logsumexp(x):
    c = x.max()
    return c + np.log(np.sum(np.exp(x - c)))

Conditional Probability Calculations

T5 Calculations

In [None]:
# Load appropriate scores.
PICKLE_PATH = 't5_conditional_adjacent.pickle'
with open(PICKLE_PATH, 'rb') as pfile:
  obj = pickle.load(pfile)

In [None]:
# Get the set of prompt templates.
prompt_templates = [
    "My preferred words are {here} and <extra_id_0>.",
    "My preferred words are {here}, <extra_id_0>, and tree.",
    "She wrote the words {here} and <extra_id_0>.",
    "She wrote the words {here} and <extra_id_0> in her notebook.",
    "She wrote the words {here}, <extra_id_0>, and cabbage.",
    "I wrote the words {here} and <extra_id_0>.",
    "I wrote the words {here} and <extra_id_0> in my notebook.",
    "I wrote the words {here}, <extra_id_0>, and cabbage.",
    "He wrote the words {here} and <extra_id_0>.",
    "He wrote the words {here} and <extra_id_0> in his notebook.",
    "He wrote the words {here}, <extra_id_0>, and cabbage.",
    "We wrote the words {here} and <extra_id_0>.",
    "We wrote the words {here} and <extra_id_0> in our notebook.",
    "We wrote the words {here}, <extra_id_0>, and cabbage.",
    "Mary wrote the words {here} and <extra_id_0>.",
    "Mary wrote the words {here} and <extra_id_0> in her notebook.",
    "Mary wrote the words {here}, <extra_id_0>, and cabbage.",
    "Please spell {here} and <extra_id_0>.",
    "Please spell {here}, <extra_id_0>, and panther.",
    "Please spell {here} and <extra_id_0> correctly.",
    "Say {here} and <extra_id_0>.",
    "Say {here}, <extra_id_0>, and tapestry.",
    "Say {here} and <extra_id_0> again.",
    "The first words on the list were {here} and <extra_id_0>.",
    "The first words on the list were {here}, <extra_id_0>, and oligarchy.",
    "The easiest words on the list were {here} and <extra_id_0>.",
    "The easiest words on the list were {here}, <extra_id_0>, and oligarchy.",
    "The hardest words on the list were {here} and <extra_id_0>.",
    "The hardest words on the list were ${here}, <extra_id_0>, and oligarchy.",
]

In [None]:
# Number of prompt templates.
nprompts = len(prompt_templates)
# Instances per prompt template.
limit = 16028

# Normalized Conditionals.
grand_us_us = [] 
grand_us_uk = []
grand_uk_us = []
grand_uk_uk = []

# Consistency preferences.
grand_con_us = []
grand_con_uk = []

# Loop over prompts.
for i, xp in enumerate(extra_prompts):
  us_con = []
  uk_con = []
  total_us_us = 0
  total_us_uk = 0
  total_uk_us = 0
  total_uk_uk = 0
  for us_us, us_uk, uk_us, uk_uk in zip(obj['us_us'][i*limit:(i+1)*limit], obj['us_uk'][i*limit:(i+1)*limit], obj['uk_us'][i*limit:(i+1)*limit], obj['uk_uk'][i*limit:(i+1)*limit]):

    # Check preferences.
    if us_us >= us_uk:
      us_con.append(1)
    else:
      us_con.append(0)

    if uk_uk >= uk_us:
      uk_con.append(1)
    else:
      uk_con.append(0)

    us_us, us_uk, uk_us, uk_uk = np.exp(us_us), np.exp(us_uk), np.exp(uk_us), np.exp(uk_uk)
    us_norm = us_us + us_uk
    uk_norm = uk_us + uk_uk
    us_us, us_uk = us_us/us_norm, us_uk/us_norm
    uk_us, uk_uk = uk_us/uk_norm, uk_uk/uk_norm
    total_us_us += us_us
    total_us_uk += us_uk
    total_uk_us += uk_us
    total_uk_uk += uk_uk
  total = total_us_us + total_us_uk + total_uk_us + total_uk_uk
  us_total = total_us_us + total_us_uk
  uk_total = total_uk_us + total_uk_uk
  normed_us_us = total_us_us/us_total
  normed_us_uk = total_us_uk/us_total
  normed_uk_us = total_uk_us/uk_total
  normed_uk_uk = total_uk_uk/uk_total

  # Print some statistics per prompt template.
  print(xp)
  print("-- US UK")
  print("US", normed_us_us, normed_us_uk)
  print("UK", normed_uk_us, normed_uk_uk)

  # Add to grand total.
  grand_us_us.append(normed_us_us)
  grand_us_uk.append(normed_us_uk)
  grand_uk_us.append(normed_uk_us)
  grand_uk_uk.append(normed_uk_uk)

  # Print consistency per prompt.
  print("================")
  print('us_con', 'uk_con')
  print(np.mean(us_con), np.mean(uk_con))
  print()
  
  # Aggregate
  grand_con_us.append(np.mean(us_con))
  grand_con_uk.append(np.mean(uk_con))

In [None]:
# Print aggregate stats.
print('us_us', np.mean(grand_us_us), np.std(grand_us_us))
print('us_uk', np.mean(grand_us_uk), np.std(grand_us_uk))
print('uk_us', np.mean(grand_uk_us), np.std(grand_uk_us))
print('uk_uk', np.mean(grand_uk_uk), np.std(grand_uk_uk))

print('us_con', np.mean(grand_con_us))
print('uk_con', np.mean(grand_con_uk))

GPT2 Calculations

In [None]:
# Load appropriate scores.
PICKLE_PATH = 'gpt2_adjacent_scores.pickle'
with open(PICKLE_PATH, 'rb') as pfile:
  obj = pickle.load(pfile)

In [None]:
# Scoring regime type (0 = to end of target, 1 = to end of sentence (EOS), 2 = full sentence "joint")
tp = 1

us_con = []
uk_con = []
total_us_us = 0
total_us_uk = 0
total_uk_us = 0
total_uk_uk = 0
for us_us, us_uk, uk_us, uk_uk in zip(obj['us_us'], obj['us_uk'], obj['uk_us'], obj['uk_uk']):

  us_us = us_us[tp]
  us_uk = us_uk[tp]
  uk_us = uk_us[tp]
  uk_uk = uk_uk[tp]

  # Check preferences.
  if us_us >= us_uk:
    us_con.append(1)
  else:
    us_con.append(0)

  if uk_uk >= uk_us:
    uk_con.append(1)
  else:
    uk_con.append(0)

  x = np.array([us_us, us_uk])
  nx = np.exp(x - logsumexp(x))
  us_us, us_uk = nx[0], nx[1]

  x = np.array([uk_us, uk_uk])
  nx = np.exp(x - logsumexp(x))
  uk_us, uk_uk = nx[0], nx[1]

  total_us_us += us_us
  total_us_uk += us_uk
  total_uk_us += uk_us
  total_uk_uk += uk_uk
total = total_us_us + total_us_uk + total_uk_us + total_uk_uk
us_total = total_us_us + total_us_uk
uk_total = total_uk_us + total_uk_uk
normed_us_us = total_us_us/us_total
normed_us_uk = total_us_uk/us_total
normed_uk_us = total_uk_us/uk_total
normed_uk_uk = total_uk_uk/uk_total

# Print aggreate conditional probabilities and consensus values.
print('us_us', 'us_uk', 'uk_us','uk_uk' )
print(normed_us_us, normed_us_uk, normed_uk_us, normed_uk_uk)
print('us_con', 'uk_con')
print(np.mean(us_con), np.mean(uk_con))

Log-likelihood Ratio Calculations for T5

In [None]:
# Load appropriate scores.
PICKLE_PATH = 't5_joint_adjacent.pickle'
with open(PICKLE_PATH, 'rb') as pfile:
  obj = pickle.load(pfile)


In [None]:
total_llr = 0
total_count = 0
for us_us, us_uk, uk_us, uk_uk in zip(obj['us_us'], obj['us_uk'], obj['uk_us'], obj['uk_uk']):
  x = np.array([us_us, us_uk, uk_us, uk_uk])
  nx = np.exp(x - logsumexp(x))
  us_us, us_uk, uk_us, uk_uk = nx[0], nx[1], nx[2], nx[3]

  us_prompt = us_us + us_uk
  us_target = us_us + uk_us
  uk_prompt = uk_us + uk_uk
  uk_target = us_uk + uk_uk

  # Per-sample LLR calculation
  p1 = us_us * np.log(us_us/(us_prompt*us_target))
  p2 = us_uk * np.log(us_uk/(us_prompt*uk_target))
  p3 = uk_us * np.log(uk_us/(uk_prompt*us_target))
  p4 = uk_uk * np.log(uk_uk/(uk_prompt*uk_target))
  llr = (p1+p2+p3+p4)
  total_llr += llr
  total_count += 1
# Print aggregate LLR
print('average_llr')
print(total_llr/total_count)

Licensed under the Apache License, Version 2.0