In [42]:
import re
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from datasets import load_dataset

In [4]:
gsm8k_ds = load_dataset("gsm8k", "main")

Downloading readme:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [69]:
def abcdef(string):
    return string.upper()

mapped = gsm8k_ds.map(lambda example: {"upper": abcdef(example["question"])})

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [3]:
base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

In [16]:
math_model = AutoModelForCausalLM.from_pretrained("nvidia/OpenMath-Mistral-7B-v0.1-hf", device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [67]:
meta_model = AutoModelForCausalLM.from_pretrained("meta-math/MetaMath-Mistral-7B", device_map="auto")
meta_tokenizer = AutoTokenizer.from_pretrained("meta-math/MetaMath-Mistral-7B")

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

In [36]:
math_tokenizer = AutoTokenizer.from_pretrained("nvidia/OpenMath-Mistral-7B-v0.1-hf")
math_tokenizer.pad_token = math_tokenizer.eos_token

In [18]:
def math_template(question):
    prefix = "System:\nYou're an expert Python programmer and mathematician. Help the user to solve this problem using code when necessary. Make sure to put the answer (and only answer) inside \\boxed{}.\n\n"
    return prefix + f"User:\n{question}\n\nAssistant:\n"

In [63]:
def run_math_model(questions, math_model, math_tokenizer):
    prompts = [math_template(question) for question in questions]
    tokens = math_tokenizer(prompts, return_tensors="pt", padding=True)
    with torch.no_grad():
        output = math_model.generate(**tokens, max_new_tokens=1024, temperature=0)

    return math_tokenizer.batch_decode(output)

In [64]:
questions = gsm8k_ds["test"]["question"][:4]
answers = gsm8k_ds["test"]["answer"][:4]
results = run_math_model(questions, math_model, math_tokenizer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [65]:
def parse_math_result(result):
    pattern = re.compile(r'\\boxed\{(.+?)\}')
    matches = pattern.findall(result)
    return matches[0]

def parse_gsm8k_answer(answer):
    pattern = re.compile(r'#### (.+)$')
    matches = pattern.findall(answer)
    return matches[0]

def compute_accuracy(results, answers):
    assert len(results) == len(answers)
    correct = 0
    for result, answer in zip(results, answers):
        if int(result) == int(answer):
            correct += 1
    return correct / len(results)

In [61]:
i = 3
print(results[i])
print(answers[i])

</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s><s> System:
You're an expert Python programmer and mathematician. Help the user to solve this problem using code when necessary. Make sure to put the answer (and only answer) inside \boxed{}.

User:
James decides to run 3 sprints 3 times a week.  He runs 60 meters each sprint.  How many total meters does he run a week?

Assistant:
 Let's solve this problem using Python code.
<llm-code>
sprint_length = 60
sprints_per_week = 3 * 3
total_distance = sprint_length * sprints_per_week
total_distance
</llm-code>
<llm-code-output>
1800
</llm-code-output>
Thus James runs \boxed{1800} meters a week.</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s><

In [66]:
parsed_results = [parse_math_result(result) for result in results]
print(parsed_results)
parsed_answers = [parse_gsm8k_answer(answer) for answer in answers]
print(parsed_answers)
print(compute_accuracy(parsed_results, parsed_answers))

['56', '3', '60000', '1800']
['18', '3', '70000', '540']
0.25


In [1]:
from evaluator import Evaluator
from lm import MetaMathLM

In [2]:
gsm_evaluator = Evaluator()
meta_lm = MetaMathLM(device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [3]:
gsm_evaluator.evaluate(meta_lm, 1, verbose=2)



Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

["Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nJanet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?\n\n### Response: Let’s think step by step.\nJanet’s ducks lay 16 eggs per day.\nShe eats three for breakfast every morning, so she has 16 - 3 = 13 eggs left.\nShe bakes muffins for her friends every day with four eggs, so she has 13 - 4 = 9 eggs left.\nShe sells the remainder at the farmers' market daily for $2 per fresh duck egg.\nSo, she makes 9 * $2 = $18 every day at the farmers' market.\n#### 18\nThe answer is: 18"]
['Janet sells 16 - 3 - 4 = <<16-3-4=9>>9 duck eggs a day.\nShe makes 9 * 2 = $<<9*2=18>>18 every day at the farmer’s market.\n#### 18']
[18] [18]


1.0

: 

In [9]:
def dare(p, sft_params, base_params, clip=True):
    mask = torch.rand((sft_params.shape), device=sft_params.device) < p
    sft_params[~mask] = base_params[~mask]
    # TODO: How to deal with truncation?
    if clip:
        sft_params[mask] = ((sft_params[mask] - p * base_params[mask]) / (1 - p)).clamp(max=255).byte()
    else:
        sft_params[mask] = ((sft_params[mask] - p * base_params[mask]) / (1 - p))

In [10]:
def get_attention_parameters(layer):
    attn = layer.self_attn
    weights = []
    weights.append(attn.q_proj)
    weights.append(attn.k_proj)
    weights.append(attn.v_proj)
    weights.append(attn.o_proj)

    return [w._parameters["weight"] for w in weights]

def get_mlp_parameters(layer):
    mlp = layer.mlp
    weights = []
    weights.append(mlp.gate_proj)
    weights.append(mlp.up_proj)
    weights.append(mlp.down_proj)

    return [w._parameters["weight"] for w in weights]

In [11]:
question = "If I am 10 and my sister is half my age, what age is my sister when I am 20?"
question = "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"
run_math_model(question, math_model, math_tokenizer)

<s> System:
You're an expert Python programmer and mathematician. Help the user to solve this problem using code when necessary. Make sure to put the answer (and only answer) inside \boxed{}.

User:
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?

Assistant:
 Let's solve this problem using Python code.
<llm-code>
clips_sold_in_april = 48
clips_sold_in_may = clips_sold_in_april / 2
clips_sold_in_april_and_may = clips_sold_in_april + clips_sold_in_may
clips_sold_in_april_and_may
</llm-code>
<llm-code-output>
72.0
</llm-code-output>
Thus Natalia sold \boxed{72} clips in April and May.</s>


In [12]:
with torch.no_grad():
    for math_layer, base_layer in zip(math_model.model.layers, base_model.model.layers):
        math_attention = get_attention_parameters(math_layer)
        base_attention = get_attention_parameters(base_layer)

        math_mlp = get_mlp_parameters(math_layer)
        base_mlp = get_mlp_parameters(base_layer)

        math_weights = math_attention + math_mlp
        base_weights = base_attention + base_mlp
        for math_params, base_params in zip(math_weights, base_weights):
            dare(0.9, math_params, base_params, clip=False)


In [13]:
run_math_model(question, math_model, math_tokenizer)

<s> System:
You're an expert Python programmer and mathematician. Help the user to solve this problem using code when necessary. Make sure to put the answer (and only answer) inside \boxed{}.

User:
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?

Assistant:
 from # from let Fuß package Christian Christian Christian Christian the # from Christian Christian Christian the # from from from from from let Fuß g from from from from let Fuß g the let Fuß cy from from from from from from let Fuß g from from from from let Fuß g from from from let Fuß g from Christian Christian from let Fuß cy # from from from from from let Fuß cy # from from from let Fuß cy # Christian Christian from # Christian Christian from let Fuß cy # from from from from let Fuß cy # from from from let Fuß cy # Christian from let Fuß cy # Christian Christian from Christian from # Fuß cy # Christian Christian from Chr