In [1]:
#%pip install transformers==4.34.0
#pip install git+https://github.com/huggingface/transformers
# pip install unstructured

In [1]:
from Extract_QA_Utils import load_markdown_document, split_documents

docs = load_markdown_document('./Extract_QA.md')
all_splits = split_documents(docs)

In [3]:
all_docs = []
for doc in all_splits:
   all_docs.append({
      'page_content': doc.page_content,
      'source': doc.metadata['source']
   })

In [5]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments
import torch

In [6]:
MODEL_NAME = "Mistral-7B-Instruct-v0.1"

In [7]:
def load_peft_model(base_model: str, peft_model: str):
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        return_dict=True,
        quantization_config=bnb_config,
        #device_map="auto",
        device_map="cuda:0",
        # trust_remote_code=True,
        local_files_only=True,
        # use_safetensors=True
    )
    if peft_model is not None:
        if os.path.exists(f"{peft_model}/adapter_config.json"):
            print(f"Loading PEFT {peft_model}")
            model.load_adapter(peft_model)
        else:
            print("WARNING: PEFT_MODEL NOT EXISTS!!!")
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer


In [8]:
def create_llama2_generation_prompt(system_message, question: str):
    if system_message is not None:
        return ("<s>[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{user_input} [/INST]"
                .format(system_message=system_message, user_input=question))
    prompt_template = """<s>[INST] {user_input} [/INST]"""
    return prompt_template.format(user_input=question)


def ask_llama2_instruction_prompt(model, generation_config, tokenizer, device, question: str):
    system_msg = (
        "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. "
        "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. "
        "Please ensure that your responses are socially unbiased and positive in nature.\n"
        "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. "
        "If you don't know the answer to a question, please don't share false information.")
    prompt = create_llama2_generation_prompt(system_msg, question)
    encoding = tokenizer(prompt, return_tensors="pt").to(device)

    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config
    )

    resp = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return resp.replace(prompt, "")

In [9]:
class LLM:
    def __init__(self):
        model, tokenizer, generation_config = self.load_model()
        self.model = model
        self.tokenizer = tokenizer
        self.generation_config = generation_config
        self.device = 'cuda'

    def load_model(self):
        base_model = f"../models/{MODEL_NAME}"
        model, tokenizer = load_peft_model(base_model, None)
        generation_config = model.generation_config
        generation_config.max_new_tokens = 3048
        generation_config.temperature = 0.7
        generation_config.do_sample = True
        generation_config.top_p = 2
        generation_config.num_return_sequences = 1
        generation_config.pad_token_id = tokenizer.eos_token_id
        generation_config.eos_token_id = tokenizer.eos_token_id
        return model, tokenizer, generation_config

    def ask(self, user_input: str):
        answer = ask_llama2_instruction_prompt(model=self.model,
                                               generation_config=self.generation_config,
                                               tokenizer=self.tokenizer,
                                               device=self.device,
                                               question=user_input)
        
        idx = answer.find('[/INST]')
        answer = answer[idx+7:]
        return answer


In [16]:
PROMPT = """{content}
----------
According to the above content, please generate several questions,
the answer of which must contain all or part of the content above.
The total content of the generated questions and answers must cover at least 80% of the above content.
Please do not generate duplicate questions.
Every question number must be prefixed with the keyword "Question".
When generating questions, do not give me the question number.
Response example:
   Question: How to play
   Answer: You muse stardy
   Question: Hi
   Answer: Hello """

In [7]:
def read_file(file: str):
    with open(file, 'r', encoding='utf-8') as f:
        return f.read()

content = read_file('./Extract_QA.txt')

In [10]:
llm = LLM()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
def generate_qa(content):
   prompt = PROMPT.format(content=content)
   answer = llm.ask(prompt)
   return answer

In [32]:
PROMPT1 = """{content}
----------
According to the above content, 
Please summarize while retaining essential metadata and information from the original text.
Generate questions and answers from the summary; the answers must be present in the original content above.
Every question number must be prefixed with the keyword "Question".
"""

In [33]:
prompt = PROMPT1.format(content=content)

In [12]:
def summarize(content):
   template = """{content}
   ----------
   According to the above content, 
   Please summarize while retaining essential metadata and information from the original text.
   """
   answer = llm.ask(template.format(content=content))
   return answer

In [13]:
def append_to_file(text, mode='a'):
   with open('./Extract_QA-Results.md', mode, encoding='utf-8') as f:
      f.write(text)
      f.write('\r\n')

In [31]:
content = all_docs[0]['page_content']

In [None]:
summarize_content = summarize(content)

In [23]:
append_to_file("#summarize")
append_to_file(summarize_content)

In [28]:
qa_for_summarize = generate_qa(summarize_content)

In [29]:
append_to_file("# qa for summarize")
append_to_file(qa_for_summarize)

In [14]:
def extract_topics(content):
   template = """{content}
   ----------
   According to the above content, 
   Extract several NLP Topics, list them, with each Topic on a separate line.
   Do not number each line.
   """
   answer = llm.ask(template.format(content=content))
   return answer

In [95]:
topics = extract_topics(content)
print(topics)

 Live dealer baccarat game
Objective of the game
Player's hand vs. banker's hand
Betting on a tie
Betting on the player's hand
Betting on the banker's hand
Betting on a tie
Game presented with a live person dealing the cards
Theoretical return to player of the game
Average return to the player of the game over a long period of time


In [15]:
def generate_qa_from_topics(content, topics):
   template = """{content}
   ----------
   According to the above content, 
   Generate questions based on these TOPICS; the answers must be present in the above content.
   {topics}
   Just provide the questions and answers,
   Each question should be prefaced with 'Question:' and each answer with 'Answer:'
   """
   answer = llm.ask(template.format(content=content, topics=topics))
   return answer

In [97]:
qa_for_topics = generate_qa_from_topics(content, topics)

In [98]:
append_to_file("# qa for topics")
append_to_file(qa_for_topics)

In [16]:
def generate_category(content):
   template = """{content}
   ----------
Extract all contrasting NLP entities from the content of the article. 
List out relevant questions and answers highlighting the differences for each contrasting entity.
Just provide the questions and answers,
Each question should be prefaced with 'Question:' and each answer with 'Answer:'
   """
   answer = llm.ask(template.format(content=content))
   return answer


In [71]:
qa_category = generate_category(content)

In [None]:
print(qa_category)

In [None]:
append_to_file("# qa for category")
append_to_file(qa_category)

In [17]:
def generate_all_qa(content):
   summarize_content = summarize(content)
   append_to_file("# summarize")
   append_to_file(summarize_content)
   topics = extract_topics(content)
   qa_for_topics = generate_qa_from_topics(content, topics)
   append_to_file("# qa for topics")
   append_to_file(qa_for_topics)
   qa_category = generate_category(content)
   append_to_file("# qa for category")
   append_to_file(qa_category)
   append_to_file("----------")
   

In [4]:
print(f"{len(all_docs)=}")

len(all_docs)=9


In [18]:
for doc in all_docs:
   content = doc['page_content']
   generate_all_qa(content)
