# Using trained PiPP classifiers

In [1]:
__author__ = "Christopher Potts"

In [2]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import utils

## Model loading

In [3]:
model = AutoModelForSequenceClassification.from_pretrained("cgpotts/pipp-finder-bert-base-cased")

tokenizer = AutoTokenizer.from_pretrained("cgpotts/pipp-finder-bert-base-cased")

## Example usage

In [4]:
utils.model_predict(["Happy though we were with the idea, we rejected it."], tokenizer, model)

array([1])

In [5]:
utils.model_predict(["Though we were happy with the idea, we rejected it."], tokenizer, model)

array([0])

## Assessments

### Main experimental materials

In [6]:
with open("materials.txt") as f:
    materials = f.read().splitlines()

In [7]:
data = []

for prep in ("as", "though", "asas"):
    for emb in ("", "they said that we knew that"):
        embstatus = True if emb else False
        testers = [utils.item(s, preposition=prep, embedding=emb) for s in materials]
        for cond in ('PiPP (Filler/Gap)', 'PP (No Filler/No Gap)'):
            label = 1 if cond == 'PiPP (Filler/Gap)' else 0
            exs = [d[cond][0] for d in testers]
            preds = utils.model_predict(exs, tokenizer, model)
            correct = sum([p == label for p in preds])
            total = len(preds)
            d = {"preposition": prep,
                 "embedding": emb,
                 "condition": cond,
                 "correct": correct,
                 "total": total}
            data.append(d)

main_df = pd.DataFrame(data)

In [8]:
main_df['accuracy'] = main_df.correct / main_df.total

In [9]:
main_df

Unnamed: 0,preposition,embedding,condition,correct,total,accuracy
0,as,,PiPP (Filler/Gap),32,33,0.969697
1,as,,PP (No Filler/No Gap),33,33,1.0
2,as,they said that we knew that,PiPP (Filler/Gap),32,33,0.969697
3,as,they said that we knew that,PP (No Filler/No Gap),33,33,1.0
4,though,,PiPP (Filler/Gap),33,33,1.0
5,though,,PP (No Filler/No Gap),33,33,1.0
6,though,they said that we knew that,PiPP (Filler/Gap),33,33,1.0
7,though,they said that we knew that,PP (No Filler/No Gap),33,33,1.0
8,asas,,PiPP (Filler/Gap),33,33,1.0
9,asas,,PP (No Filler/No Gap),33,33,1.0


In [10]:
f"Macro average accuracy: {main_df.correct.sum() / main_df.total.sum()}"

'Macro average accuracy: 0.9949494949494949'

### Stress tests

In [11]:
stress_df = pd.read_csv("materials-stress-test.csv")

#### PiPPs

In [12]:
stress_pipp = list(stress_df.PiPP.values)

In [13]:
stress_pipp_preds = [p== 1 for p in utils.model_predict(stress_pipp, tokenizer, model)]

In [14]:
f"Stress case PiPPs: {sum(stress_pipp_preds)} of {len(stress_pipp_preds)} correct"

'Stress case PiPPs: 8 of 9 correct'

In [15]:
[s for s, c in zip(stress_pipp, stress_pipp_preds) if not c]

['Learn Klingon though they wanted to try to, they did not have time.']

#### PPs

In [16]:
stress_pp = list(stress_df.PP.values)

In [17]:
stress_pp_preds = [p == 0 for p in utils.model_predict(stress_pp, tokenizer, model)]

In [18]:
f"Stress case PPs: {sum(stress_pp_preds)} of {len(stress_pp_preds)} correct"

'Stress case PPs: 9 of 9 correct'