# PiPP wh-effects

In [1]:
__author__ = "Christopher Potts"

## Set-up

In [2]:
import json
import glob
import matplotlib.pyplot as plt
import numpy as np
from minicons import scorer
import os
import pandas as pd
import re

import utils

In [3]:
plt.style.use("pipp.mplstyle")

## Items

In [4]:
utils.item("Happy* though we were GAP with the idea, we had to reject it.", 
     embedding="", 
     preposition="though")

{'PiPP (Filler/Gap)': ('Happy though we were with the idea, we had to reject it.',
  'with'),
 'PP (No Filler/No Gap)': ('Though we were happy with the idea, we had to reject it.',
  'happy'),
 'Filler/No Gap': ('Happy though we were happy with the idea, we had to reject it.',
  'happy'),
 'No Filler/Gap': ('Though we were with the idea, we had to reject it.',
  'with')}

## Materials

In [5]:
with open("materials.txt") as f:
    materials = f.read().splitlines()

## Experiments

In [6]:
def run_item(ex, item_num, model, embedding="", preposition="though"):
    data = []
    conds = utils.item(ex, embedding=embedding, preposition=preposition)
    for typ, (text, target) in conds.items():
        response = {}
        response['fulltext'] = text
        response['prompt'] = text
        toks_with_logprobs = model.token_score([text], rank=False)[0]
        toks, logprobs = zip(*toks_with_logprobs)
        response['prompt_tokens'] = list(toks)
        response['prompt_scores'] = list(logprobs)
        inds = [i for i, tok in enumerate(toks) if tok.strip() == target]
        if typ == 'Filler/No Gap':
            # In this condition, there can be two identical tokens.
            # This occurs when the PiPP is sentence-medial and so its
            # nucleus phrase is not capitalized. The second token is 
            # always the one filling a GAP position.
            ti = inds[-1]
        else:
            # In other conditions, where there is a gap, the token
            # right after the hypothesized gap spot is sometimes
            # incidentally repeated later in the example, but never
            # before, so we can use the first.
            ti = inds[0]
        surprisal = convert_to_surprisal(logprobs[ti])
        response['condition'] = typ
        response['target_surprisal'] = surprisal
        response['item_num'] = item_num
        data.append(response)
    return data

def convert_to_surprisal(x):
    return -(x / np.log(2))

In [7]:
def run_experiment(materials, model_name, embedding="", preposition="though"):
    data = []
    model = scorer.IncrementalLMScorer(model_name)
    for item_num, m in enumerate(materials, start=1):
        data += run_item(m, item_num, model, embedding=embedding, preposition=preposition)
    emb = f"-{embedding.replace(' ', '_')}" if embedding else ""
    model_nickname = model_name.split("/")[-1]
    output_filename = f"results/results-{model_nickname}-{preposition}{emb}.json"
    with open(output_filename, "wt") as f:
        json.dump(data, f, indent=4)

### Models

In [8]:
all_model_names = [
    "EleutherAI/pythia-70m-deduped",
    "EleutherAI/pythia-160m-deduped",
    "EleutherAI/pythia-410m-deduped",
    "EleutherAI/pythia-1b-deduped",
    "EleutherAI/pythia-1.4b-deduped",
    "EleutherAI/pythia-2.8b-deduped",
    "EleutherAI/pythia-6.9b-deduped"
    "EleutherAI/pythia-12b-deduped"
]

for model_name in all_model_names:
    for embedding in ("", "they said that we knew that"):
        for prep in ("though", "as", "asas"):
            run_experiment(materials, model_name, embedding=embedding, preposition=prep)

## Visualization

In [9]:
order = ['PiPP (Filler/Gap)',  'No Filler/Gap', 'Filler/No Gap', 'PP (No Filler/No Gap)'][::-1]

for results_filename in glob.glob(os.path.join("results", "results-pythia*.json")):
    utils.mean_plot(results_filename, order, xlim=20)