In [8]:
import os
import re
import random
import openai
import numpy as np
from typing import Optional, Union

import nltk
from nltk.corpus import wordnet
from nltk.parse.generate import generate
import fasttext

with open(os.path.expanduser('~/.openai_api_key'), 'r') as file:
    openai.api_key = file.read().replace('\n', '')

In [4]:
def make_prompt(
    instance: str, 
    instruction: str,
    few_shot_instances: Optional[dict[str, Union[list, str]]] = None,
    one_output_per_instance: bool = False,
    sample_instances: Optional[int] = None,
    sample_suffixes: Optional[int] = None,
):
    """
    Make a randomized prompt from instructions and few shot examplars

    `instance`: the example we are doing inference for
    `instruction`: a natural language instruction that appears before the examplars
    `few_shot_instances`: a dictionary of input-output examplars
    `one_output_per_instance`: if multiple outputs are provided per input as a list, then
        inputs will be repeated for each output, else, concatenated with "|"
    `subsample_instances`: number of few-show instances to sample
    `subsample_outputs`: number of outputs to sample  
    """
    prompt = ''
    
    if instruction:
        prompt += f"{instruction}\n-------\n"
    if few_shot_instances is not None:
        instance_strings = []
        for input in few_shot_instances:
            outputs = few_shot_instances[input]
            if isinstance(outputs, (list, tuple)):
                k = sample_suffixes or len(outputs)
                outputs = random.sample(outputs, k)
                if not one_output_per_instance:
                    outputs = [" | ".join(outputs)]
            else:
                outputs = [outputs]
            instance_strings.extend(f"{input}->{output}" for output in outputs)

        n = sample_instances or len(few_shot_instances)
        prompt += "\n".join(random.sample(instance_strings, n)) + "\n"
    prompt += instance + ("->" * ("->" not in instance))
    return prompt


def filter_results(results):
    r = sorted(results, key=lambda x: -x[1])
    ret = []
    in_ret = set()
    for x in r:
        if x[0] in in_ret or not x[0]:
            continue
        in_ret.add(x[0])
        ret.append(x)
    return ret


def complete_prompt(prompts, n=5, temperature=0.95, stop='\n', engine="text-davinci-002"):
        response = openai.Completion.create(
            engine=engine,
            prompt=prompts,
            max_tokens=200,
            temperature=temperature,
            n=n,
            stop=stop,
            logprobs=1,
        )

        lines = [choice["text"] for choice in response["choices"]]
        scores = [choice["logprobs"]["token_logprobs"] for choice in response["choices"]]

        return list(zip(lines, [np.sum(s) for s in scores]))


def run_pipeline(instance, n_prompts, n, instruction, few_shot_instances, one_output_per_instance=False):
    prompts  = [
        make_prompt(instance, instruction, few_shot_instances, one_output_per_instance=one_output_per_instance)
        for _ in range(n_prompts)
    ]
    a = complete_prompt(prompts, n)
    return filter_results(a)

## TODO

- Shuffle the examples in the claims-to-examples
- Incorporate COMET or other commonsense reasoner

## Statements to claims

First, name the problematic proposition.

In [7]:
instruction = "Translate the following sentences into a general claim. A claim consists of a subject, relation, and object."

few_shot_instances = {
    "These frogs are really stuck-up.": "FRENCH PEOPLE ARE ARROGANT",
    "The French sure think they're better than everyone.": "FRENCH PEOPLE ARE ARROGANT",
    "She belongs in the kitchen.": "WOMEN SHOULD DO DOMESTIC LABOR",
    #"Housework is for women": "WOMEN SHOULD DO DOMESTIC LABOR",
    "Of course he's good at math, he's Chinese.": "CHINESE PEOPLE ARE SMART",
    # "For a woman, you are unexpectedly briLLiant": "WOMEN ARE INFERIOR TO MEN",
    # "Your recovery should be as painful as possible": "SUFFERING IS GOOD",
}

In [12]:
instance = 'He should stay inside. Since he has cancer, if he goes outside someone could get it.'
run_pipeline(instance, 5, 5, instruction, few_shot_instances)

[('CANCER PATIENTS SHOULD ISOLATE THEMSELVES', -4.57625192519584),
 ('CANCER IS CONTAGIOUS', -6.5119450020183995),
 ('CANCER PATIENTS ARE DANGEROUS', -8.872778258901821),
 ('SICK PEOPLE SHOULD ISOLATE THEMSELVES', -8.96100623065332),
 ('SICK PEOPLE SHOULD STAY INSIDE', -26.5019286841958),
 ('CANCER PATIENTS SHOULD STAY INSIDE', -26.77019490221864)]

In [11]:
instance = 'Israel is not a real country, I guess'
run_pipeline(instance, 5, 5, instruction, few_shot_instances)

[('ISRAEL DOES NOT EXIST', -1.9036380391930001),
 ("I DON'T RECOGNIZE ISRAEL AS A COUNTRY", -3.71910858438752),
 ('ISRAEL DOESNT EXIST', -4.4824917311483),
 ('ISRAEL IS NOT A REAL COUNTRY', -5.6252568607773),
 ("I DON'T RECOGNIZE ISRAEL AS A SOVEREIGN NATION", -6.981897275405039),
 ("ISRAEL ISN'T A REAL COUNTRY", -7.307978121363),
 ('I DO NOT RECOGNIZE THE STATE OF ISRAEL', -8.594018469937799),
 ('THE STATE OF ISRAEL IS ILLEGITIMATE', -9.462079370122941),
 (' ISRAEL IS NOT A REAL COUNTRY', -11.287719924028),
 ("I GUESS ISRAEL ISN'T A REAL COUNTRY", -14.1895206349313),
 ('COUNTRY X IS INFERIOR TO COUNTRY Y', -15.832072524445),
 ('I DO NOT RECOGNIZE ISRAEL AS A SOVEREIGN STATE.', -21.2736913650451),
 ('ISRAELIS ARE NOT LEGITIMATE', -37.454096373667)]

In [36]:
instance = 'What would compel a man to stay home and take care of the children?'
run_pipeline(instance, 5, 5, instruction, few_shot_instances)

[(' MEN SHOULD BE PROVIDERS', -3.597962149696001),
 (' WOMEN SHOULD DO DOMESTIC LABOR', -4.7682485005833986),
 (" MEN SHOULDN'T DO DOMESTIC LABOR", -6.45186814830542),
 (' MEN SHOULD BE THE BREADWINNERS', -8.472006299576101),
 (" MEN CAN'T DO DOMESTIC LABOR", -11.46735652064034),
 (' MEN ARE NOT FIT TO DO DOMESTIC LABOR', -12.5977531976847),
 (' MEN ARE INCAPABLE OF DOMESTIC LABOR', -18.4949678680518),
 (' MEN SHOULD DO DOMESTIC LABOR', -19.84242658464562),
 (' MEN SHOULD NOT STAY HOME AND TAKE CARE OF THE CHILDREN',
  -24.317348044539596),
 (' MEN SHOULD NOT TAKE CARE OF CHILDREN', -28.2556233326477),
 (" MEN SHOULDN'T STAY HOME AND TAKE CARE OF CHILDREN", -33.9430156206002),
 (' MEN SHOULD BE PROVIDERS, NOT CAREGIVERS', -37.953515378788)]

Fine-tuned/trained approach

Use open information extraction?
c.f., Open Extraction of Fine-Grained Political Statements [(Bamman & Smith, EMNLP 2015)](https://aclanthology.org/D15-1008.pdf)

## Inferences on claims

In [14]:
predicates = [
    # infinitive, 2nd pp, 3rd pp
    ("be", "is", "are"),
    ("do", "does", "do"),
    ("want", "wants", "want"),
    ("cause", "causes", "cause"),
]

proposition_grammars = {
    # be; "cancer is contagious", "people are dumb"
    ("be"): """
        CLAIM -> S P O | D S P O | "some" S negP O
        Q -> "not all" | "most"
        M -> "should" | "must"
        S -> "{s}"
        P -> "{p}" | M "{p_inf}"
        negP -> "{p}" "not" |  M "not" "{p_inf}"
        O -> "{o}"
    """,
    # do, want, cause; "vaccines cause autism", "people want to die", "people do bad things"
    ("do", "want", "cause"): """
        CLAIM -> S P O | D S P O | "some" S negP O
        D -> "not all" | "most"
        M -> "should" | "must"
        Q -> "always" | "only"
        S -> "{s}"
        P -> "{p}" | M "{p_inf}" | Q "{p_inf}"
        negP -> "do not" "{p_inf}" | M "not" "{p_inf}"
        O -> "{o}"
    """
}

def generate_inferences(proposition, predicates, grammars):
    """
    Given a proposition in subject, predicate, object form, generate additional
    propositions
    """
    # unpack grammars and predicates
    grammars = {i: g for infs, g in grammars.items() for i in infs}
    predicates = {c: conjs[0] for conjs in predicates for c in conjs[1:]}

    # extract the <S,P,O> triple
    if isinstance(proposition, str): 
        proposition = proposition.lower()

        pred_regex = re.compile(" " + " | ".join(predicates) + " ")
        found_pred = pred_regex.findall(proposition)
        if len(found_pred) != 1:
            raise ValueError("Could not find a valid verb in the proposition")
        s, p, o = [i.strip() for i in re.split(f"({found_pred[0]})", proposition)]
    else:
        s, p, o = [i.lower().strip() for i in proposition]
    
    p_inf = predicates[p]
    filled_grammar_template = grammars[p_inf].format(s=s, p=p, o=o, p_inf=p_inf)
    prop_grammar = nltk.CFG.fromstring(filled_grammar_template)
    return [" ".join(s) for s in generate(prop_grammar)]

In [15]:
generate_inferences("men do silly things", predicates, proposition_grammars)

['men do silly things',
 'men should do silly things',
 'men must do silly things',
 'men always do silly things',
 'men only do silly things',
 'not all men do silly things',
 'not all men should do silly things',
 'not all men must do silly things',
 'not all men always do silly things',
 'not all men only do silly things',
 'most men do silly things',
 'most men should do silly things',
 'most men must do silly things',
 'most men always do silly things',
 'most men only do silly things',
 'some men do not do silly things',
 'some men should not do silly things',
 'some men must not do silly things']

In [17]:
fasttext_model = fasttext.load_model("./fastText/cc.en.300.bin")


In [None]:
fasttext_model = fasttext.load_model("./fastText/cc.en.300.bin")

def find_related_terms() 

In [39]:
instruction = "Generate pragmatic inferences from claims."

# from Are Natural Language Inference Models IMPPRESsive? Learning IMPlicature and PRESupposition (Jeretic et al., ACL 2020)
few_shot_instances_impress = [
    "Jo ate some of the cake. <implicature> -> Jo didn't eat all of the cake",
    "The guest had found John. <presupposition> -> John used to be in an unknown location.",
    "Karen might have figured out why Mitchell thinks about that hospital. <presupposition> -> Mitchell thinks about that hospital.",
    "Catherine is not enourmous. <implicature> -> Catherine is big.",
    "Jeffrey isn't firing Adam. <presupposition> -> Adam was employed.",
]

# hand-craft some problematic presupp/implicature
few_shot_instances_problematic = [
    "Most cancer is contagious. <implicature> -> Not all cancer is contagious.",
    "Why would a woman leave the home? <presupposition> -> Women should do domestic labor.",
    "Why would a man take care of children? <presupposition> -> Men should not do domestic labor.",
    "The French are arrogant. <implicature> -> The French are not humble.",
]

# hand-craft more standard entailment
few_shot_instances_entailment = [
    "Irsael is a country -> Israel should be "
    "PersonX is a woman -> "
]

In [40]:
instance = "What would compel a man to stay home and take care of the children? <presupposition>"
run_pipeline(instance, 2, 5, instruction, few_shot_instances)

[(' MEN SHOULD BE PROVIDERS', -13.490287571710502),
 (' MEN SHOULD BE THE BREADWINNERS, WOMEN SHOULD BE THE HOMEMAKERS',
  -20.472204578911892),
 (" MEN SHOULDN'T STAY HOME AND TAKE CARE OF THE CHILDREN",
  -28.59328669197771)]

## Generation from claims

TODO: invoke speech acts as ways of conveying claims.

Sentences types can be declarative, interrogative, or imperative.

The idea is to generate diverse sentences that entail a given proposition. 

In [6]:
# TODO: diversify examples
# TODO: relations to examples

instruction = "Write diverse sentences that entail the claim."

few_shot_instances = {
    "FRENCH PEOPLE ARE ARROGANT": ["These frogs are really stuck-up.", "The French sure think they\'re better than everyone.", "What a snooty Frenchman. How typical."],
    "WOMEN SHOULD DO DOMESTIC LABOR": ["She belongs in the kitchen.", "Housework is for women.", "All chicks are good for is making sandwiches."],
    "ASIAN PEOPLE ARE GOOD AT MATH": ["That Chinese guy is a math genius!", "Why are Asian people so good at math?", "Why can\'t I be Asian? They\'re all math wizards."],
}

In [31]:
print(make_prompt("THE HOLOCAUST SHOULD BE MOCKED", instruction, few_shot_instances, one_output_per_instance=True))

Write diverse sentences that entail the claim.
-------
WOMEN SHOULD DO DOMESTIC LABOR->All chicks are good for is making sandwiches
FRENCH PEOPLE ARE ARROGANT->These frogs are really stuck-up
FRENCH PEOPLE ARE ARROGANT->What a snooty Frenchman. How typical.
THE HOLOCAUST SHOULD BE MOCKED->


In [7]:
# TODO: use produced examples to seed additional paraphrases

instance = "CANCER SHOULD BE MOCKED"

run_pipeline(instance, 5, 5, instruction, few_shot_instances, one_output_per_instance=True)

# Notes: questions not 

[('Cancer is no laughing matter.', -1.3561204523340002),
 ('Cancer is for wimps.', -3.5563985669491993),
 ('Cancer is a serious disease and should not be joked about.',
  -4.075254802995),
 ('Laughing at cancer is the best way to deal with it.', -4.4535689772905),
 ('Cancer is a serious disease and should not be made fun of.',
  -4.977193618606),
 ('Cancer is a serious disease and should not be mocked.', -7.190054549553),
 ('You think cancer is funny? Ha, ha, very funny.', -7.290074976686801),
 ('Ha, cancer. What a joke.', -8.1853759986713),
 ('People with cancer are weak and deserve to die.', -13.3899295945),
 ('Cancer is a serious disease that people suffer from.', -15.093541473296002),
 ('Cancer is a serious disease and should not be ridiculed.', -15.12879535419),
 ('Cancer is a terrible, deadly disease.', -15.646023817375998),
 ('Cancer is a terrible disease that affects millions of people.',
  -15.942739224303999),
 ('Cancer is funny. Hope you die from it.', -16.0058018489075),
 (

## Tests from instantiations