In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig

torch.random.manual_seed(0)

DEFAULT_CONFIG = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(
    "winglian/Llama-3-8b-64k-PoSE", 
    device_map="cuda", 
    torch_dtype="auto",
    quantization_config=DEFAULT_CONFIG,
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("winglian/Llama-3-8b-64k-PoSE")
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
import json
from pathlib import Path
import sys
from functools import partial
from tqdm import tqdm_notebook as tqdm
sys.path.append("../") 

from src.data.utils import read_jsonl, extract_text_by_headers_html, dedup_results, html2text_parser
import torch
from src.models.ir.crossencoder import CrossEncoderIR
# from src.models.llm.llama import LlamaLLM
# from src.models.baseline_pipeline import BaselinePipeline
# from sentence_transformers import CrossEncoder


In [3]:
json_objs = read_jsonl("../data/raw/task1/crag_task_1_dev_v3_release.jsonl", -1)
json_objs_split1 = [json_obj for json_obj in json_objs if json_obj["split"] == 1]
json_objs = [json_obj for json_obj in json_objs if json_obj["split"] == 0]
# preprocessing_fn = partial(extract_text_by_headers_html, version="v2", split_sentences=False)
preprocessing_fn = html2text_parser
ir_model = CrossEncoderIR(max_sentences=50, score_threshold=-1)

In [4]:
generation_args = {
    "max_new_tokens": 25,
    "return_full_text": False,
    "temperature": 0.0,
    "do_sample": False,
}


for json_obj in tqdm(json_objs[2:]):
    my_sys_message = f"Below I am going to give you a query and context documents to answer the query. You task is to return the requested document tags for the query if they are required to answer the question. For example, if the information between \"<DOC1></DOC1>\" is able to answer the quest, return \"<DOC1>\". If multiple tags together are required, return them all. If none of the documentat tags can answer the queston, simple return \"None\". Below are some examples:\n#Example 1\nAnswer: <DOC1>\n#Example 2\nAnswer: None\n#Example 3\nAnswer: <DOC6>, <DOC10>\n"
    user_message = "\n{}\n\nQuery: {}\n What are the minimal set of <DOC> tags that required to answer the query above?"
    answer, query, search_results, query_time = json_obj["answer"], json_obj["query"], json_obj["search_results"], json_obj["query_time"]
    deduped_search_results = dedup_results(search_results)
    candidates = []
    for i, search_result in enumerate(deduped_search_results):
        with open(f"htmls/{i}.html", "w") as f:
            print(search_result["page_result"], file=f)
        for segment in preprocessing_fn(search_result):
            candidates.append(segment)

    top_segments = ir_model.get_top_sentences(query, candidates)
    segments_text = [f"<DOC{i}> {segment[0]}\n</DOC{i}>" for i, segment in enumerate(top_segments)]
    segments_text = '\n\n'.join(segments_text).strip()
    text_representation = f"<DOCS>\n{segments_text}\n</DOCS>"
    formatted_user_message = user_message.format(text_representation, query)
    bos = "<|begin_of_text|>"
    sys_message = "<|start_header_id|>system<|end_header_id|>"
    eos = "<|eot_id|>"
    user_message = "<|start_header_id|>user<|end_header_id|>"
    assistant_message = "<|start_header_id|>assistant<|end_header_id|>"
    prompt = f"{bos}{sys_message}\n{my_sys_message}{eos}{user_message}\n{formatted_user_message}{eos}{assistant_message}Answer:"
    print(prompt)
    tokenized_prompt = tokenizer(prompt, return_tensors="pt")
    # print(len(tokenized_prompt["input_ids"][0]))
    # print(prompt)
    # prompt = 
    # prompt = "<|user|>\n{}<|end|>\n<|assistant|> Answer: ".format(formatted_user_message)
    with torch.no_grad():
        output = pipe(prompt, **generation_args)
    print(output[0]['generated_text'])
    break

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for json_obj in tqdm(json_objs[2:]):


  0%|          | 0/1369 [00:00<?, ?it/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Below I am going to give you a query and context documents to answer the query. You task is to return the requested document tags for the query if they are required to answer the question. For example, if the information between "<DOC1></DOC1>" is able to answer the quest, return "<DOC1>". If multiple tags together are required, return them all. If none of the documentat tags can answer the queston, simple return "None". Below are some examples:
#Example 1
Answer: <DOC1>
#Example 2
Answer: None
#Example 3
Answer: <DOC6>, <DOC10>
<|eot_id|><|start_header_id|>user<|end_header_id|>

<DOCS>
<DOC0> Every Oscar-Winning Animated Feature — The Ultimate List
The Incredibles Scene: Fire and Ice:
The Incredibles is a thrilling and action-packed animated movie that won the
Best Animated Feature Oscar in 2004. Directed by Brad Bird, the film tells the
story of a family of superheroes forced to hide their powers and live a normal
life until



 <DOC1> Here Are All the Oscar Winners for Best Animated Feature
The Incredibles Scene: Fire and Ice:

