In [1]:
import json

with open("./data/math/input.json", "r") as f:
    data = json.load(f)

with open("./data/math/sample.json", "r") as f:
    samples = json.load(f)

In [2]:
def generate_zeroshot_prompt(input):
    return f"""Q: {input['question']}
A: """

def generate_fewshot_prompt(samples, input):
    return f"""Q: {samples[0]['question']}
A: {samples[0]['answer']}

Q: {samples[1]['question']}
A: {samples[1]['answer']}

Q: {input['question']}
A:
"""

def generate_zeroshot_cot_prompt(input):
    return f"""Q: {input['question']}
A: Let's think step by step. """

def generate_fewshot_cot_prompt(samples, input):
    return f"""Q: {samples[0]['question']}
A: 
{samples[0]['explanation']}

Q: {samples[1]['question']}
A: 
{samples[1]['explanation']}

Q: {input['question']}
A: 
"""

def generate_analogical_prompt(input):
    return f"""Your task is to tackle mathematical problems. When presented with a math problem, recall relevant problems as examples. Afterward, proceed to solve the initial problem.
# Problem:
{input['question']}

# Instructions:
## Relevant Problems:
Recall three examples of math problems that are relevant to the initial problem. Your problems should be distinct from each other and from the initial problem (e.g., involving different numbers and names). For each problem:
- After "Q: ", describe the problem
- After "A: ", explain the solution and highlight the final answer.

## Solve the Initial Problem:
Q: Copy and paste the initial problem here.
A: Explain the solution and highlight the final answer.
"""

def generate_ps_prompt(input):
    return f"""Q: {input['question']}
A: Let's first understand the problem and devise a plan to solve the problem. Then let's carry out the plan and solve the problem step by step. """

def generate_psplus_prompt(input):
    return f"""Q: {input['question']}
A: Let's first read and understand the problem carefully, extract numbers from the list that fulfill the condition provided, then compute the numbers according to the instruction. Devise a plan to answer the instruction, then carry out the plan to solve the problem step by step. """

In [3]:
from llama_index.llms import LlamaCPP
from llama_index.llms.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

from transformers import AutoTokenizer

llm = LlamaCPP(
    model_path="./../simple-rag/llm/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    temperature=0.1,
    max_new_tokens=2048,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    verbose=True
)

checkpoint = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [4]:
import os
from tqdm import tqdm

model = "mistral-7b-instruct"
os.makedirs(f"./results/math/{model}", exist_ok=True)

results = {
    "zeroshot": [],
    "fewshot": [],
    "zeroshot_cot": [],
    "fewshot_cot": [],
    "analogical": [],
    "ps": [],
    "psplus": [],
}

for d in tqdm(data):
    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_zeroshot_prompt(d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["zeroshot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })
    
    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_fewshot_prompt(samples, d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["fewshot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_zeroshot_cot_prompt(d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["zeroshot_cot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_fewshot_cot_prompt(samples, d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["fewshot_cot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_analogical_prompt(d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["analogical"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_ps_prompt(d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["ps"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"{generate_psplus_prompt(d)}"}], 
        tokenize=False, 
        add_generation_prompt=True
    )
    results["psplus"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

for prompt_type, result in results.items():
    with open(f"./results/math/{model}/{prompt_type}.json", "w") as f:
        json.dump(result, f, indent=4)

  0%|          | 0/20 [00:00<?, ?it/s]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
  5%|▌         | 1/20 [05:15<1:39:52, 315.39s/it]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
 10%|█         | 2/20 [09:41<1:25:58, 286.58s/it]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
 15%|█▌        | 3/20 [16:04<1:33:38, 330.51s/it]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-

In [5]:
llm = LlamaCPP(
    model_path="./../simple-rag/llm/llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.1,
    max_new_tokens=2048,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [6]:
model = "llama2-7b-chat"
os.makedirs(f"./results/math/{model}", exist_ok=True)

results = {
    "zeroshot": [],
    "fewshot": [],
    "zeroshot_cot": [],
    "fewshot_cot": [],
    "analogical": [],
    "ps": [],
    "psplus": [],
}

for d in tqdm(data):
    prompt = generate_zeroshot_prompt(d)
    results["zeroshot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })
    
    prompt = generate_fewshot_prompt(samples, d)
    results["fewshot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = generate_zeroshot_cot_prompt(d)
    results["zeroshot_cot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = generate_fewshot_cot_prompt(samples, d)
    results["fewshot_cot"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = generate_analogical_prompt(d)
    results["analogical"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = generate_ps_prompt(d)
    results["ps"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

    prompt = generate_psplus_prompt(d)
    results["psplus"].append({
        "prompt": prompt,
        "response": llm.complete(prompt).text,
        "gold_answer": d["answer"]
    })

for prompt_type, result in results.items():
    with open(f"./results/math/{model}/{prompt_type}.json", "w") as f:
        json.dump(result, f, indent=4)

  0%|          | 0/20 [00:00<?, ?it/s]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
  5%|▌         | 1/20 [04:52<1:32:44, 292.87s/it]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
 10%|█         | 2/20 [12:15<1:54:21, 381.17s/it]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
 15%|█▌        | 3/20 [17:38<1:40:25, 354.45s/it]Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-match hit
Llama.generate: prefix-

In [None]:
llm = LlamaCPP(
    model_path="./../simple-rag/llm/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    temperature=0.1,
    max_new_tokens=2048,
    context_window=3900,
    generate_kwargs={},
    model_kwargs={"n_gpu_layers": -1},
    verbose=True
)

checkpoint = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)