## Dataloading

In [None]:
import os
import pandas as pd

# Load datatset
# combine all the hops into one df, add # hops as an additional column
dir = "prontoqa_data/fictional"
json_paths = [os.path.join(dir, f"{i}hop.json") for i in range(1, 6)]

def load_json_to_df(path, num_hops):
    df = pd.read_json(path) 
    df = df.transpose()
    df['num_hops'] = num_hops

    df = df.reset_index()
    df['id'] = df.apply(lambda x: (x['index'] + f"_{str(x['num_hops'])}"), axis=1)

    return df

df = load_json_to_df(json_paths[0], num_hops=1)
print(len(df))
for i, path in enumerate(json_paths[1:]):
    df2 = load_json_to_df(json_paths[0], num_hops=i+2) # starting from the 2nd
    df = pd.concat([df, df2])
    # print(len(df))

df = df.reset_index(drop=True)

df.tail(3)


In [None]:
# Note: question = facts, query = conclusion, chain of thought = reasoning steps, answer = gold label
in_context_example0_dict = df['in_context_example0'][0]
print(in_context_example0_dict)

## DSP

In [None]:
import os
import dsp

root_path = '.'

os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(root_path, 'cache')

openai_key = os.getenv('OPENAI_API_KEY')  # or replace with your API key (optional)
lm = dsp.GPT3(model='text-davinci-003', api_key=openai_key)
dsp.settings.configure(lm=lm)

### Forward Template

In [None]:
Facts_And_Rules = dsp.Type(
    prefix="Facts and rules:", 
    desc="${the facts and rules}")

Query = dsp.Type(
    prefix="Query:", 
    desc="${the query}")

Proof = dsp.Type(
    prefix="Proof:", 
    desc="${a step-by-step proof that the query is true or false based only on the facts and rules}",
    format=dsp.format_answers
    )

Answer = dsp.Type(
    prefix="Answer:", 
    desc="${the final answer based on the above proof}",
    format=dsp.format_answers
    )

backward_cot_template = dsp.Template(
    instructions="Use backward chaining to reason over the facts and rules to determine whether the query is true or false",
    facts_and_rules=Facts_And_Rules(),
    query=Query(),
    proof=Proof(),
    answer=Answer()
    )

### Backward Template

In [None]:
Facts_And_Rules = dsp.Type(
    prefix="Facts and rules:", 
    desc="${the facts and rules}")

Query = dsp.Type(
    prefix="Query:", 
    desc="${the query}")

Proof = dsp.Type(
    prefix="Proof:", 
    desc="${a step-by-step proof that the query is true or false based only on the facts and rules}",
    format=dsp.format_answers
    )

Answer = dsp.Type(
    prefix="Answer:", 
    desc="${the final answer based on the above proof}",
    format=dsp.format_answers
    )

forward_cot_template = dsp.Template(
    instructions="Use forward chaining to reason over the facts and rules to determine whether the query is true or false",
    facts_and_rules=Facts_And_Rules(),
    query=Query(),
    proof=Proof(),
    answer=Answer()
    )

### Bidirectional (OR)

In [None]:
Facts_And_Rules = dsp.Type(
    prefix="Facts and rules:", 
    desc="${the facts and rules}")

Query = dsp.Type(
    prefix="Query:", 
    desc="${the query}")

Proof = dsp.Type(
    prefix="Proof:", 
    desc="${a step-by-step proof that the query is true or false based only on the facts and rules}",
    format=dsp.format_answers
    )

Answer = dsp.Type(
    prefix="Answer:", 
    desc="${the final answer based on the above proof}",
    format=dsp.format_answers
    )

bidirectional_cot_template = dsp.Template(
    instructions="Use forward or backward chaining to reason over the facts and rules to determine whether the query is true or false",
    facts_and_rules=Facts_And_Rules(),
    query=Query(),
    proof=Proof(),
    answer=Answer()
    )

### Helpers

In [None]:
def get_demos_backward_cot(df, id):
    examples = []
    for ex_num in range(8):
        demo_dict = df.iloc[id][f'in_context_example{ex_num}']
        facts_and_rules = demo_dict['question']
        query = demo_dict['query']
        proof = ' '.join(demo_dict['chain_of_thought'][::-1])
        answer = demo_dict['answer']
        
        ex = dsp.Example(
            facts_and_rules=facts_and_rules,
            query=query,
            proof=proof,
            answer=answer
        )
        examples.append(ex)
    return examples

def get_demos_forward_cot(df, id):
    examples = []
    for ex_num in range(8):
        demo_dict = df.iloc[id][f'in_context_example{ex_num}']
        facts_and_rules = demo_dict['question']
        query = demo_dict['query']
        proof = ' '.join(demo_dict['chain_of_thought'])
        answer = demo_dict['answer']
        
        ex = dsp.Example(
            facts_and_rules=facts_and_rules,
            query=query,
            proof=proof,
            answer=answer
        )
        examples.append(ex)
    return examples

def get_test_example_cot(df, id):
    demo_dict = df.iloc[id][f'test_example']
    facts_and_rules = demo_dict['question'] 
    query = demo_dict['query']

    return dsp.Example(facts_and_rules=facts_and_rules, query=query)

def get_test_answer_forward_cot(df, id):
    ex_dict = df.iloc[id][f'test_example']
    answer = ex_dict['answer']
    cot = ex_dict['chain_of_thought']
    return ' '.join(cot), answer

def get_test_answer_backward_cot(df, id):
    ex_dict = df.iloc[id][f'test_example']
    answer = ex_dict['answer']
    cot = ex_dict['chain_of_thought'][::-1]
    return ' '.join(cot), answer


### DSP Program

In [None]:
@dsp.transformation
def generic_dsp(df, id, template, get_demos, get_test_example, k=2, temperature=0.0): 
    example = get_test_example(df, id)

    if k > 0:
        demos = get_demos(df, id)
        example.demos = dsp.sample(demos, k=k)
    else:
        example.demos = []
    
    # Run your program using `template`:
    example, example_compl = dsp.generate(template, temperature=temperature)(example, stage="cot")

    # Return the `dsp.Completions`:
    return example_compl

### Example to see format

In [None]:
demos = get_demos_forward_cot(df, 0)

ex = dsp.Example(
    facts_and_rules=df.iloc[0]['test_example']['question'],
    query=df.iloc[0]['test_example']['query'],
    demos=dsp.sample(demos, k=2)
)

print(forward_cot_template(ex))

### Some more helpers

In [None]:
# Run a couple of forward passes of the LLM
def sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer):
    num_correct = 0
    num_correct_cot = 0

    completions = []
    for id in range(start, start+num_total):
        print(id)
        completion = generic_dsp(df, id, template=template, k=k, get_demos=get_demos, get_test_example=get_test_example)
        completions.append(completion)

        predicted_answer = completion.answer
        predicted_cot = completion.proof

        if isinstance(get_test_answer, list):
            gold_cots = []
            for fn in get_test_answer:
                gold_cot, gold_answer = fn(df, id)
                gold_cots.append(gold_cot)
        else:
            gold_cot, gold_answer = get_test_answer(df, id)

        print(f"Query: {completion.query}")
        print(f"Predicted COT: {predicted_cot}")

        if isinstance(gold_cot, list):
            for i, cot in gold_cot:
                print(f"Gold COT {i}: {cot}")
        else:
            print(f"Gold COT: {gold_cot}")

        print(f"Predicted answer: {predicted_answer}")
        print(f"Gold answer: {gold_answer}")
        print("="*80)

        if predicted_answer == gold_answer:
            num_correct += 1
        
        if predicted_cot == gold_cot:
            num_correct_cot += 1
        
    print(f"Accuracy: {num_correct / num_total}")
    print(f"COT Accuracy: {num_correct_cot / num_total}")
    
    return completions

## Prelim Experiments

### Forward CoT

In [None]:
# Zero-shot Forward COT
start = 0 #2480 (5-shot is at the end -- refer to df)
num_total = 10
k=0
template=forward_cot_template
get_demos=get_demos_forward_cot
get_test_example=get_test_example_cot
get_test_answer=get_test_answer_forward_cot
completions = sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer)

## Backward CoT

In [None]:
# Zero-shot Backward COT
start = 0 #2480
num_total = 10
k=0
template=backward_cot_template
get_demos=get_demos_backward_cot
get_test_example=get_test_example_cot
get_test_answer=get_test_answer_backward_cot
sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer)

## Forward or Backward CoT

In [None]:
# Zero-shot Forward or Backward COT
start = 0 #2480
num_total = 10
k=0
template=bidirectional_cot_template
get_demos=get_demos_backward_cot # unused b/c zero-shot
get_test_example=get_test_example_cot
get_test_answer=[get_test_answer_forward_cot, get_test_answer_backward_cot]
sample_completion(start, num_total, k, df, template, get_demos, get_test_example, get_test_answer)