## Dataloading

In [11]:
import os
import pandas as pd
import pickle

# Load datatset
# combine all the hops into one df, add # hops as an additional column
DATA_DIR = "prontoqa_data/fictional"
HOPS = [1, 3, 5]
JSON_PATHS = [os.path.join(dir, f"{i}hop.json") for i in HOPS]

def load_json_to_df(path, num_hops):
    df = pd.read_json(path) 
    df = df.transpose()
    df['num_hops'] = num_hops

    df = df.reset_index()
    df['id'] = df.apply(lambda x: (x['index'] + f"_{str(x['num_hops'])}"), axis=1)

    return df

df = load_json_to_df(JSON_PATHS[0], num_hops=1)
print(len(df))
for hops, path in zip(HOPS[1:], JSON_PATHS[1:]):
    df2 = load_json_to_df(path, num_hops=hops) # starting from the 2nd
    df = pd.concat([df, df2])
    # print(len(df))

df = df.reset_index(drop=True)

df.tail(3)


500


Unnamed: 0,index,in_context_example0,in_context_example1,in_context_example2,in_context_example3,in_context_example4,in_context_example5,in_context_example6,in_context_example7,test_example,num_hops,id
1497,example498,{'question': 'Dumpuses are red. Dumpuses are y...,{'question': 'Dumpuses are bright. Dumpuses ar...,{'question': 'Every wumpus is happy. Each wump...,{'question': 'Vumpuses are hot. Every vumpus i...,{'question': 'Every impus is happy. Each impus...,{'question': 'Every numpus is bitter. Every vu...,{'question': 'Numpuses are temperate. Each num...,{'question': 'Jompuses are not small. Every jo...,{'question': 'Jompuses are earthy. Every jompu...,5,example498_5
1498,example499,{'question': 'Each dumpus is red. Dumpuses are...,{'question': 'Each yumpus is bright. Each yump...,{'question': 'Each jompus is not luminous. Jom...,{'question': 'Each vumpus is large. Vumpuses a...,{'question': 'Impuses are opaque. Impuses are ...,{'question': 'Yumpuses are not bright. Every y...,{'question': 'Zumpuses are feisty. Every zumpu...,{'question': 'Each jompus is not mean. Jompuse...,{'question': 'Every wumpus is brown. Every wum...,5,example499_5
1499,example500,{'question': 'Rompuses are temperate. Rompuses...,{'question': 'Dumpuses are not dull. Every zum...,{'question': 'Each wumpus is small. Every tump...,{'question': 'Each impus is spicy. Impuses are...,{'question': 'Every rompus is not large. Every...,{'question': 'Dumpuses are opaque. Dumpuses ar...,{'question': 'Vumpuses are wooden. Every vumpu...,{'question': 'Every rompus is wooden. Rompuses...,{'question': 'Every zumpus is aggressive. Zump...,5,example500_5


In [15]:
def random_sample(df, k=100):
    df = df.sample(frac=1)[:k]
    return df

def preprocess(k):
    df = load_json_to_df(JSON_PATHS[0], num_hops=1)
    df = random_sample(df, k)

    print(f"len(df) after adding {JSON_PATHS[0]}: {len(df)}")
    for hops, path in zip(HOPS[1:], JSON_PATHS[1:]):
        df2 = load_json_to_df(path, num_hops=hops) # starting from the 2nd
        df2 = random_sample(df2, k)
        df = pd.concat([df, df2])
        print(f"len(df) after adding {path}: {len(df)}")

    df = df.reset_index(drop=True)

    return df

k = 100
df = preprocess(k)
out_file = os.path.join(DATA_DIR, 'sampled_data.pkl')

with open(out_file, 'wb') as f:
    pickle.dump(df, f)

len(df) after adding prontoqa_data/fictional/1hop.json: 100
len(df) after adding prontoqa_data/fictional/3hop.json: 200
len(df) after adding prontoqa_data/fictional/5hop.json: 300


In [19]:
# Note: question = facts, query = conclusion, chain of thought = reasoning steps, answer = gold label
in_context_example0_dict = df['in_context_example0'][0]
print(in_context_example0_dict)

in_context_example0_dict = df['in_context_example0'][299]
print(in_context_example0_dict)

{'question': 'Every vumpus is opaque. Vumpuses are impuses. Each impus is not bright. Each impus is a rompus. Each rompus is red. Every rompus is a numpus. Numpuses are large. Numpuses are yumpuses. Yumpuses are aggressive. Every yumpus is a zumpus. Every zumpus is floral. Every zumpus is a wumpus. Each jompus is not sweet. Each wumpus is sweet. Wumpuses are dumpuses. Stella is a wumpus.', 'query': 'True or false: Stella is not sweet.', 'chain_of_thought': ['Stella is a wumpus.', 'Each wumpus is sweet.', 'Stella is sweet.'], 'answer': 'False'}
{'question': 'Zumpuses are nervous. Zumpuses are dumpuses. Dumpuses are bright. Each dumpus is a numpus. Each numpus is liquid. Every wumpus is floral. Numpuses are impuses. Each impus is small. Every impus is a vumpus. Every vumpus is not floral. Vumpuses are tumpuses. Every tumpus is cold. Each tumpus is a rompus. Rompuses are not opaque. Rompuses are yumpuses. Yumpuses are aggressive. Every yumpus is a jompus. Sally is a zumpus.', 'query': 'Tr

## DSP

In [None]:
import os
import dsp

root_path = '.'

os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(root_path, 'cache')

openai_key = os.getenv('OPENAI_API_KEY')  # or replace with your API key (optional)
lm = dsp.GPT3(model='text-davinci-003', api_key=openai_key)
dsp.settings.configure(lm=lm)

### Backward Template

In [None]:
Facts_And_Rules = dsp.Type(
    prefix="Facts and rules:", 
    desc="${the facts and rules}")

Query = dsp.Type(
    prefix="Query:", 
    desc="${the query}")

Proof = dsp.Type(
    prefix="Proof:", 
    desc="${a step-by-step proof that the query is true or false based only on the facts and rules}",
    format=dsp.format_answers
    )

Answer = dsp.Type(
    prefix="Answer:", 
    desc="${the final answer based on the above proof}",
    format=dsp.format_answers
    )

backward_cot_template = dsp.Template(
    instructions="Use backward chaining to reason over the facts and rules to determine whether the query is true or false",
    facts_and_rules=Facts_And_Rules(),
    query=Query(),
    proof=Proof(),
    answer=Answer()
    )

### Forward Template

In [None]:
Facts_And_Rules = dsp.Type(
    prefix="Facts and rules:", 
    desc="${the facts and rules}")

Query = dsp.Type(
    prefix="Query:", 
    desc="${the query}")

Proof = dsp.Type(
    prefix="Proof:", 
    desc="${a step-by-step proof that the query is true or false based only on the facts and rules}",
    format=dsp.format_answers
    )

Answer = dsp.Type(
    prefix="Answer:", 
    desc="${the final answer based on the above proof}",
    format=dsp.format_answers
    )

forward_cot_template = dsp.Template(
    instructions="Use forward chaining to reason over the facts and rules to determine whether the query is true or false",
    facts_and_rules=Facts_And_Rules(),
    query=Query(),
    proof=Proof(),
    answer=Answer()
    )

### Bidirectional (OR)

In [None]:
Facts_And_Rules = dsp.Type(
    prefix="Facts and rules:", 
    desc="${the facts and rules}")

Query = dsp.Type(
    prefix="Query:", 
    desc="${the query}")

Proof = dsp.Type(
    prefix="Proof:", 
    desc="${a step-by-step proof that the query is true or false based only on the facts and rules}",
    format=dsp.format_answers
    )

Answer = dsp.Type(
    prefix="Answer:", 
    desc="${the final answer based on the above proof}",
    format=dsp.format_answers
    )

bidirectional_cot_template = dsp.Template(
    instructions="Use forward or backward chaining to reason over the facts and rules to determine whether the query is true or false",
    facts_and_rules=Facts_And_Rules(),
    query=Query(),
    proof=Proof(),
    answer=Answer()
    )

### Helpers

In [None]:
def get_demos_backward_cot(df, id):
    examples = []
    for ex_num in range(8):
        demo_dict = df.iloc[id][f'in_context_example{ex_num}']
        facts_and_rules = demo_dict['question']
        query = demo_dict['query']
        proof = ' '.join(demo_dict['chain_of_thought'][::-1])
        answer = demo_dict['answer']
        
        ex = dsp.Example(
            facts_and_rules=facts_and_rules,
            query=query,
            proof=proof,
            answer=answer
        )
        examples.append(ex)
    return examples

def get_demos_forward_cot(df, id):
    examples = []
    for ex_num in range(8):
        demo_dict = df.iloc[id][f'in_context_example{ex_num}']
        facts_and_rules = demo_dict['question']
        query = demo_dict['query']
        proof = ' '.join(demo_dict['chain_of_thought'])
        answer = demo_dict['answer']
        
        ex = dsp.Example(
            facts_and_rules=facts_and_rules,
            query=query,
            proof=proof,
            answer=answer
        )
        examples.append(ex)
    return examples

def get_test_example_cot(df, id):
    demo_dict = df.iloc[id][f'test_example']
    facts_and_rules = demo_dict['question'] 
    query = demo_dict['query']

    return dsp.Example(facts_and_rules=facts_and_rules, query=query)

def get_test_answer_forward_cot(df, id):
    ex_dict = df.iloc[id][f'test_example']
    answer = ex_dict['answer']
    cot = ex_dict['chain_of_thought']
    return ' '.join(cot), answer

def get_test_answer_backward_cot(df, id):
    ex_dict = df.iloc[id][f'test_example']
    answer = ex_dict['answer']
    cot = ex_dict['chain_of_thought'][::-1]
    return ' '.join(cot), answer


### DSP Program

In [None]:
@dsp.transformation
def generic_dsp(df, id, template, get_demos, get_test_example, k=2, temperature=0.0): 
    example = get_test_example(df, id)

    if k > 0:
        demos = get_demos(df, id)
        example.demos = dsp.sample(demos, k=k)
    else:
        example.demos = []
    
    # Run your program using `template`:
    example, example_compl = dsp.generate(template, temperature=temperature)(example, stage="cot")

    # Return the `dsp.Completions`:
    return example_compl

### Example to see format

In [None]:
demos = get_demos_forward_cot(df, 0)

ex = dsp.Example(
    facts_and_rules=df.iloc[0]['test_example']['question'],
    query=df.iloc[0]['test_example']['query'],
    demos=dsp.sample(demos, k=2)
)

print(forward_cot_template(ex))

### Some more helpers

In [None]:
# Run a couple of forward passes of the LLM
def sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer):
    num_correct = 0
    num_correct_cot = 0

    completions = []
    for id in range(start, start+num_total):
        print(id)
        completion = generic_dsp(df, id, template=template, k=k, get_demos=get_demos, get_test_example=get_test_example)
        completions.append(completion)

        predicted_answer = completion.answer
        predicted_cot = completion.proof

        if isinstance(get_test_answer, list):
            gold_cots = []
            for fn in get_test_answer:
                gold_cot, gold_answer = fn(df, id)
                gold_cots.append(gold_cot)
        else:
            gold_cot, gold_answer = get_test_answer(df, id)

        print(f"Query: {completion.query}")
        print(f"Predicted COT: {predicted_cot}")

        if isinstance(gold_cot, list):
            for i, cot in gold_cot:
                print(f"Gold COT {i}: {cot}")
        else:
            print(f"Gold COT: {gold_cot}")

        print(f"Predicted answer: {predicted_answer}")
        print(f"Gold answer: {gold_answer}")
        print("="*80)

        if predicted_answer == gold_answer:
            num_correct += 1
        
        if predicted_cot == gold_cot:
            num_correct_cot += 1
        
    print(f"Accuracy: {num_correct / num_total}")
    print(f"COT Accuracy: {num_correct_cot / num_total}")
    
    return completions

## Prelim Experiments

### Forward CoT

In [None]:
# Zero-shot Forward COT
start = 0 #2480 (5-shot is at the end -- refer to df)
num_total = 10
k=0
template=forward_cot_template
get_demos=get_demos_forward_cot
get_test_example=get_test_example_cot
get_test_answer=get_test_answer_forward_cot
completions = sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer)

## Backward CoT

In [None]:
# Zero-shot Backward COT
start = 0 #2480
num_total = 10
k=0
template=backward_cot_template
get_demos=get_demos_backward_cot
get_test_example=get_test_example_cot
get_test_answer=get_test_answer_backward_cot
sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer)

## Forward or Backward CoT

In [None]:
# Zero-shot Forward or Backward COT
start = 0 #2480
num_total = 10
k=0
template=bidirectional_cot_template
get_demos=get_demos_backward_cot # unused b/c zero-shot
get_test_example=get_test_example_cot
get_test_answer=[get_test_answer_forward_cot, get_test_answer_backward_cot]
sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer)