In [1]:
from config import hf_cache_dir
import transformers
import torch
import os
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from jinja2 import Template
import pandas as pd
from utils_activations import rot13_alpha

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = '/workspace/data/axolotl-outputs/llama_deepseek_2epochs/merged'
model_path = "chingfang17/deepseek-distill-llama-rot13"
prompt_path = './prompts/three_hop_prompts.csv'

In [3]:
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.bfloat16,  # Use float16 for memory efficiency; also could be float16
    device_map="auto",          # Automatically distribute across available GPUs
    trust_remote_code=True,
    low_cpu_mem_usage=True, 
    cache_dir=hf_cache_dir
    )
tokenizer = AutoTokenizer.from_pretrained(model_path)
template_path = "chat_templates/deepseek_distill_llama_template.jinja"
with open(template_path, "r") as file:
    jinja_template = file.read()
tokenizer.chat_template = jinja_template 

Loading checkpoint shards: 100%|██████████| 30/30 [02:32<00:00,  5.09s/it]


In [None]:
prompt_df = pd.read_csv(prompt_path)

prompt = prompt_df.iloc[4]['Prompt']
messages = [
    {"role": "user", "content": prompt},
]
formatted_prompt = tokenizer.apply_chat_template(
    messages, 
    tokenize=False, 
    add_generation_prompt=True
)

In [27]:
prompt = "What is the capital of the state where the author of The Jungle was born?"
messages = [
    {"role": "user", "content": prompt},
]
formatted_prompt = tokenizer.apply_chat_template(
    messages, 
    tokenize=False, 
    add_generation_prompt=True
)

In [28]:
inputs = tokenizer(formatted_prompt, return_tensors="pt", add_special_tokens=False)

# Move inputs to the same device as model (if needed)
inputs = {k: v.to(model.device) for k, v in inputs.items()}

# Generate text
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=1500,
        do_sample=False,  # Set to True for sampling
        temperature=0.6,  # Only used when do_sample=True
        top_p=0.9,        # Only used when do_sample=True
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [29]:
# Decode the generated text
# Remove the input tokens to get only the generated part
generated_tokens = outputs[0][inputs['input_ids'].shape[1]:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

print("Generated text:")
print(generated_text)

Generated text:


Bxnl, fb V arrq gb svther bhg gur pncvgny bs gur fgngr jurer gur nhgube bs "Gur Whatyr" jnf obea. Yrg zr fgneg ol erpnyyvat jung V xabj nobhg "Gur Whatyr." V guvax vg'f n abiry, evtug? Lrnu, V erzrzore vg'f nobhg gur zrngcnpxvat vaqhfgel naq jnf jevggra ol Hcgba Fvayne. Fb, gur nhgube vf Hcgba Fvayne.

Abj, V arrq gb svaq bhg jurer Hcgba Fvayne jnf obea. V'z abg rknpgyl fher, ohg V guvax ur jnf na Nzrevpna nhgube. Znlor ur jnf obea va Puvyvpbgu? Jnvg, ab, gung qbrfa'g fbhaq evtug. V guvax Puvyvpbgu vf va Vyyvabvf, ohg V'z abg pregnva vs gung'f jurer ur jnf obea. Nygreangviryl, V zvtug or pbashfvat vg jvgu nabgure pvgl. Yrg zr guvax... V oryvrir ur jnf obea va Puvyvpbgu, Vyyvabvf. Lrnu, gung fbhaqf snzvyvne.

Fb, vs ur jnf obea va Puvyvpbgu, Vyyvabvf, gura gur fgngr jbhyq or Vyyvabvf. Abj, V arrq gb svaq gur pncvgny bs Vyyvabvf. V'z cerggl fher gur pncvgny vfa'g Puvyvpbtu orpnhfr gung'f n ovt pvgl, ohg pncvgnyf ner hfhnyyl fznyyre pvgvrf. V guvax gur pncvgny bs Vyyvabv

In [30]:
print(rot13_alpha(generated_text.split("</think>")[0]))



Okay, so I need to figure out the capital of the state where the author of "The Jungle" was born. Let me start by recalling what I know about "The Jungle." I think it's a novel, right? Yeah, I remember it's about the meatpacking industry and was written by Upton Sinlar. So, the author is Upton Sinlar.

Now, I need to find out where Upton Sinlar was born. I'm not exactly sure, but I think he was an American author. Maybe he was born in Chilicoth? Wait, no, that doesn't sound right. I think Chilicoth is in Illinois, but I'm not certain if that's where he was born. Alternatively, I might be confusing it with another city. Let me think... I believe he was born in Chilicoth, Illinois. Yeah, that sounds familiar.

So, if he was born in Chilicoth, Illinois, then the state would be Illinois. Now, I need to find the capital of Illinois. I'm pretty sure the capital isn't Chilicogh because that's a big city, but capitals are usually smaller cities. I think the capital of Illinois is Springfield

# Removing thinking content

In [8]:
formatted_prompt_no_think = formatted_prompt + "</think>\n\n"

In [9]:
formatted_prompt_no_think

'<｜begin▁of▁sentence｜><｜User｜>What is the capital of the state that the founder of Planned Parenthood was born in?<｜Assistant｜><think>\\n</think>\n\n'

In [None]:
inputs = tokenizer(formatted_prompt_no_think, return_tensors="pt", add_special_tokens=False)

# Move inputs to the same device as model (if needed)
inputs = {k: v.to(model.device) for k, v in inputs.items()}

# Generate text
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=False,  # Set to True for sampling
        temperature=0.6,  # Only used when do_sample=True
        top_p=0.9,        # Only used when do_sample=True
        pad_token_id=tokenizer.eos_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


In [44]:
# Decode the generated text
# Remove the input tokens to get only the generated part
generated_tokens = outputs[0][inputs['input_ids'].shape[1]:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

print("Generated text:")
print(generated_text)

Generated text:
</think>

There are 12 inches in a foot.
</think>

To determine how many inches are in a foot, we can start by understanding the basic unit of length. A foot is a standard unit of length, and it is equivalent to 12 inches. Therefore, there are 12 inches in a foot.
