# JSON Section

These notes are from the section of *Say What You Mean* entitled "What About JSON?" additionally this is where the JSON-mode results of "Last Letter" come from.

This section compares "JSON-mode" evaluation on the *Last Letter* task for both structured and unstrutured generation.

In [1]:
import json
import outlines
import torch
from transformers import AutoTokenizer
from textwrap import dedent
from datasets import load_dataset
import re
from jinja2 import Environment, FileSystemLoader
import yaml
import numpy as np
import matplotlib.pyplot as plt
from outlines.samplers import greedy

MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
# Load the dataset from HuggingFace
dataset = load_dataset("ChilleD/LastLetterConcat")

In [2]:
with open(f"/Users/willkurt/code/speak_freely_log_analysis/logging/lastletter-t2-structure/struct_llama-3-8b-instruct_shots_0.jsonl", 'r') as f:
    recorded_data = [json.loads(raw_obj) for raw_obj in f.readlines()]

In [3]:
all_evals = []


# Uncomment to run on full set of evals.
# Add training examples
# realized they don't actually use these.
# for item in dataset['train']:
#     all_evals.append({
#         'question': item['question'],
#         'answer': item['answer']
#     })

# Add test examples
for item in dataset['test']:
    all_evals.append({
        'question': item['question'],
        'answer': item['answer']
    })

print(f"Total examples: {len(all_evals)}")
print("Sample entry:", all_evals[0])

Total examples: 150
Sample entry: {'question': 'Take the last letters of each words in "Camilo Becky Eliza Rebecca" and concatenate them.', 'answer': 'oyaa'}


In [4]:
model = outlines.models.transformers(
    MODEL_NAME,
    device='mps',
    model_kwargs={
        'torch_dtype': torch.bfloat16,
        'trust_remote_code': True
    })

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
def create_prompt(question, tokenizer):
    messages = [
        {
            "role": "system",
            "content": dedent("""
            You are an expert in solving simple word puzzles using reasoning steps. Your specific
            task is going to be to take a list of 4 names and reason about the last letter of each .,
            then you will concatenate those letters into a word. The Question will be plaintest from the user
            and response will be formatted as JSON below:
            
            {"reasoning": <reasoning about the answer>, "answer": <final answer>}
            """)
        },
        {
            "role": "user",
            "content": """Question: {question}""".format(question="Take the last letters of each words in 'Ian Peter Bernard Stephen' and concatenate them.")
        },
        {
            "role": "assistant",
            "content": """{"reasoning": "The last letter of 'Ian' is 'N', the last letter of 'Peter' is 'R', the last letter of 'Bernard' is 'D', and the last letter of 'Stephen' is 'N'. Therefore, the answer is 'NRDN'.", "answer": "NRDN"}"""   
        },
        {
            "role": "user",
            "content": """Question: {question}", """.format(question=question)
        },
        {
            "role": "assistant",
            "content": ""
        }
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False)

print(create_prompt(all_evals[5]['question'], tokenizer))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are an expert in solving simple word puzzles using reasoning steps. Your specific
task is going to be to take a list of 4 names and reason about the last letter of each .,
then you will concatenate those letters into a word. The Question will be plaintest from the user
and response will be formatted as JSON below:

{"reasoning": <reasoning about the answer>, "answer": <final answer>}<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: Take the last letters of each words in 'Ian Peter Bernard Stephen' and concatenate them.<|eot_id|><|start_header_id|>assistant<|end_header_id|>

{"reasoning": "The last letter of 'Ian' is 'N', the last letter of 'Peter' is 'R', the last letter of 'Bernard' is 'D', and the last letter of 'Stephen' is 'N'. Therefore, the answer is 'NRDN'.", "answer": "NRDN"}<|eot_id|><|start_header_id|>user<|end_header_id|>

Question: Take the last letters of each words in "Britt Tamara Elvis Nayeli" 

### Unstructured Generation

In [6]:
LAST = len(all_evals)
answers = [ex_eval['answer'] for ex_eval in all_evals[0:LAST]]
answer_regex = r'"answer":[ ]?"([A-Za-z]{4})"'

In [7]:
unstruct = outlines.generate.text(model, sampler=greedy())

In [8]:
unstruct_resp = [unstruct(create_prompt(all_evals[i]['question'], tokenizer), max_tokens=128) for i in range(LAST)]



In [9]:
unstruct_resp

['assistant\n\n{"reasoning": "The last letter of \'Camilo\' is \'O\', the last letter of \'Becky\' is \'Y\', the last letter of \'Eliza\' is \'A\', and the last letter of \'Rebecca\' is \'A\'. Therefore, the answer is \'OYAA\'.", "answer": "OYAA"}',
 'assistant\n\n{"reasoning": "The last letter of \'Sandeep\' is \'P\', the last letter of \'Graciela\' is \'A\', the last letter of \'Jai\' is \'I\', and the last letter of \'Xiomara\' is \'A\'. Therefore, the answer is \'PAIA\'.", "answer": "PAIA"}',
 'assistant\n\n{"reasoning": "The last letter of \'Norma\' is \'A\', the last letter of \'Timothy\' is \'Y\', the last letter of \'Willie\' is \'E\', and the last letter of \'Mathew\' is \'W\'. Therefore, the answer is \'AYEW\'.", "answer": "AYEW"}',
 'assistant\n\n{"reasoning": "The last letter of \'Lorena\' is \'A\', the last letter of \'Shana\' is \'A\', the last letter of \'Priscilla\' is \'A\', and the last letter of \'Summer\' is \'R\'. Therefore, the answer is \'AARA\'.", "answer": "AAR

In [10]:
unstruct_resp_answer = [result[1].lower() if result else "" for result in [re.search(answer_regex,resp) for resp in unstruct_resp]]
unstruct_resp_score = np.mean([result[0] == result[1] for result in zip(unstruct_resp_answer, answers)])
print(unstruct_resp_score)

0.7333333333333333


## Structured Generation

In [11]:
from pydantic import BaseModel, Field, constr


class Response(BaseModel):
    reasoning: constr(max_length=300)
    answer: str = Field(pattern=r'[A-Za-z]{4}')

Check that the regex used for generation is in the prompt itself

In [12]:
from outlines.fsm.json_schema import build_regex_from_schema
schema_regex = build_regex_from_schema(Response.schema_json())

example_prompt = create_prompt(all_evals[5]['question'], tokenizer)
re.search(schema_regex, example_prompt)

<re.Match object; span=(658, 871), match='{"reasoning": "The last letter of \'Ian\' is \'N\>

In [13]:
struct_gen = outlines.generate.regex(model, schema_regex, sampler=greedy())

In [14]:
struct_resp = [struct_gen(create_prompt(all_evals[i]['question'], tokenizer)) for i in range(LAST)]

In [15]:
struct_resp_answer = [result[1].lower() if result else "" for result in [re.search(answer_regex,resp) for resp in struct_resp]]
struct_resp_score = np.mean([result[0] == result[1] for result in zip(struct_resp_answer, answers)])
print(struct_resp_score)

0.7733333333333333
