In [97]:
import sys
sys.path.append("/home/ubuntu/nethome/reasoning/git/coconut")

In [98]:
from coconut import Coconut
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

In [99]:
MODEL_ARCHITECTURE = "openai-community/gpt2"
COCONUT_CHECKPOINT = "../YOUR_PATH_TO_SAVE_THE_MODEL/gsm-coconut/checkpoint_25"
THOUGHT_CHECKPOINT = "../YOUR_PATH_TO_SAVE_THE_MODEL/gsm-cot/checkpoint_15"

GPU_DEVICE = 0

In [108]:
def build_coconut_model(
    architecture: str,
    checkpoint: str,
    latent_token: str = "<|latent|>",
    start_token: str = "<|start-latent|>",
    end_token: str = "<|end-latent|>",
    gpu_device: int = 0,
) -> dict:
    
    model = AutoModelForCausalLM.from_pretrained(architecture)
    tokenizer = AutoTokenizer.from_pretrained(architecture)
    tokenizer.pad_token = tokenizer.eos_token

    coconut_tokens = [latent_token, start_token, end_token]
    tokenizer.add_tokens(coconut_tokens)
    token_ids = tokenizer.convert_tokens_to_ids(coconut_tokens)

    model.resize_token_embeddings(len(tokenizer))
    embeddings = model.get_input_embeddings()
    target_id = tokenizer.convert_tokens_to_ids("<<")

    for token_id in token_ids:
        target_embedding = embeddings.weight.data[token_id]
        embeddings.weight.data[token_id] = target_embedding

    model = Coconut(model, *token_ids, tokenizer.eos_token_id)

    load_kwargs = {
        "map_location": torch.device(GPU_DEVICE),
        "weights_only": False,
    }
    weights = torch.load(checkpoint, **load_kwargs)
    model.load_state_dict(weights, strict=False)

    return {"model": model.to(gpu_device), "tokenizer": tokenizer}



def build_thought_model(
    architecture: str,
    checkpoint: str,
    gpu_device: int = 0,
) -> dict:

    model = AutoModelForCausalLM.from_pretrained(architecture)
    tokenizer = AutoTokenizer.from_pretrained(architecture)

    load_kwargs = {
        "map_location": torch.device(GPU_DEVICE),
        "weights_only": False,
    }
    weights = torch.load(checkpoint, **load_kwargs)
    model.load_state_dict(weights, strict=False)

    return {"model": model.to(gpu_device), "tokenizer": tokenizer}

In [114]:
coconut = build_coconut_model(MODEL_ARCHITECTURE, COCONUT_CHECKPOINT, gpu_device=GPU_DEVICE)
thought = build_thought_model(MODEL_ARCHITECTURE, THOUGHT_CHECKPOINT, gpu_device=GPU_DEVICE)

In [134]:
question = "I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?"
# question = "What would I have to do to earn one million dollars?"

for approach_name, approach in {"Coconut": coconut, "CoT": thought}.items():
    tokens = approach["tokenizer"](question, return_tensors="pt").to(0)
    outputs = approach["model"].generate(
        **tokens,
        max_new_tokens=40,
        num_beams=5,
        num_return_sequences=5,
    )
    print(f"\n{approach_name}")
    for output_id in range(len(outputs)):
        print(f"\nOutput {output_id + 1}:")
        print(f"{approach["tokenizer"].decode(outputs[output_id])}")
    print("")



Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Coconut

Output 1:
I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?### 4


CoT

Output 1:
I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?
<<3+2=5>>
<<7-5=2>>
### 2<|endoftext|>

Output 2:
I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?
<<3+2=5>>
<<7+5=12>>
### 12<|endoftext|>

Output 3:
I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?
<<7-3-2=2>>
### 2<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>

Output 4:
I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?
<<7-3=4>>
<<4-2=2>>
### 2<|endoftext|>

Output 5:
I have 7 apples and I gave Josh 3 apples and he gives Paul 2 apples. How many apples do I have?<<3+2=5>>
<<7-5=2>>
### 2<|endoftext|><|endoftext|>



In [131]:
def coconut_next_token_distribution(
    model: Coconut,
    input_ids: torch.Tensor,
    **kwargs
) -> torch.Tensor:
    gen_fwd_cnt = 0

    assert input_ids.shape[0] == 1, "only support batch_size == 1 now"

    outputs = model.forward(
        input_ids,
        torch.ones_like(input_ids, device=input_ids.device),
        input_ids.clone(),
        torch.arange(
            0, input_ids.shape[1], dtype=torch.long, device=input_ids.device
        ).reshape(1, -1),
        **kwargs,
    )

    return torch.nn.functional.softmax(outputs.logits[:, -1], dim=1)






question = "I have 7 apples and I gave Luke 3 apples and he gives Paul 2 apples. How many apples do I have?"

for approach in [coconut]:
# for approach in [thought]:
    tokens = approach["tokenizer"](question, return_tensors="pt").to(0)
    logits = generate_coconut_token_distribution(approach["model"], **tokens)

    print(logits)
    print(torch.sum(logits))
    print(torch.max(logits))

tensor([[6.9019e-08, 3.2274e-06, 3.5363e-08,  ..., 1.1694e-03, 1.8761e-03,
         2.6242e-07]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<SumBackward0>)
tensor(0.5681, device='cuda:0', grad_fn=<MaxBackward1>)


In [None]:
#<Question><Coconut><Thought2?>

q_tokens = approach["tokenizer"](question, return_tensors="pt").to(0)
