### https://mp.weixin.qq.com/s/qNcfNSjnz_lAdm3wd1EulA

### 环境：Mistrall

In [1]:
import os
import torch
from transformers import (
  AutoTokenizer,
  AutoModelForCausalLM,
  BitsAndBytesConfig,
  pipeline
)

from transformers import BitsAndBytesConfig

from langchain.text_splitter import CharacterTextSplitter

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
import transformers
import nest_asyncio

nest_asyncio.apply()
from tqdm.notebook import tqdm
from langchain_community.document_loaders import PyPDFLoader

In [2]:
# Load Tokenizer
model_name='/home/asus/文档/AIModel/Mistral-7B-Instruct-v0.2'

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
print("Tokenizer loaded !!")
## bitsandbytes parameters
# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)
# Load Model
model = AutoModelForCausalLM.from_pretrained(
        model_name,
            quantization_config=bnb_config,
        )

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Tokenizer loaded !!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [3]:
def get_vanilla_response(question, tokenizer, model):
  inputs = tokenizer.encode_plus(f"[INST] {question} [/INST]", return_tensors="pt")['input_ids'].to('cuda')

  generated_ids = model.generate(inputs,
                                max_new_tokens=1000,
                                do_sample=True)
  output = tokenizer.batch_decode(generated_ids)
  return output[0]

question = "What changes do I need in my personality to make friends easily?"
print(get_vanilla_response(question = question, tokenizer = tokenizer, model = model))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> [INST] What changes do I need in my personality to make friends easily? [/INST] Making friends is a complex process that depends on various factors, including your personality, social skills, and environmental factors. Here are some personality traits that are commonly associated with being able to make friends easily:

1. Openness: Be open to new experiences and ideas. This can help you connect with a diverse range of people and show that you are interested in getting to know them as individuals.
2. Extrovertedness: Being outgoing and sociable can help you meet new people and engage in conversations. This doesn't mean being the life of the party, but rather being friendly, approachable, and interested in other people.
3. Agreeableness: Being friendly, kind, and compassionate can help you build strong relationships. This doesn't mean being a doormat, but rather being understanding and empathetic towards others.
4. Conscientiousness: Being reliable and dependable can help build trus

In [4]:
# Create a text generation pipeline
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=10000,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [5]:
# !pip install --upgrade jupyter ipywidgets -i https://pypi.tuna.tsinghua.edu.cn/simple

In [6]:
# add Book paths from Google Drive
pdf_paths = ['./data/Dan Ariely - Predictably Irrational_ The Hidden Forces That Shape Our Decisions-HarperCollins (2008).pdf',
             './data/Daniel Goleman - Emotional Intelligence_ Why it Can Matter More Than IQ-Bloomsbury (2009).pdf',
             './data/Daniel Kahneman-Thinking_Fast and Slow.pdf',
             './data/Robert B. Cialdini - Influence_ The Psychology of Persuasion (Collins Business Essentials) (2007).pdf']
# './Elliot Aronson - The Social Animal. Tenth Edition-Worth Publishers (2007).pdf', 这本书没有
# ‘./Richard H. Thaler, Prof. Cass R. Sunstein - Nudge_ Improving Decisions About Health, Wealth, and Happiness-Yale University Press (2008).pdf', 这本书没有
docs = []
for pdf_path in tqdm(pdf_paths):
  loader = PyPDFLoader(pdf_path)
  pages = loader.load_and_split()
  docs = docs+pages[8:-10]

  0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
# Load chunked documents into the FAISS index
db = FAISS.from_documents(docs,HuggingFaceEmbeddings(model_name='./bge-base-en-v1.5'))
retriever = db.as_retriever()

In [8]:
# Create prompt template
prompt_template = """
### [INST] Instruction: Answer the question based on your human psychology knowledge, you can also use this auxillary knowledge to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

  warn_deprecated(


In [9]:
# testing on the same question for a comparison
question = "What changes do I need in my personality to make friends easily?"
op = rag_chain.invoke(question)
print(op['text'])




### [INST] Instruction: Answer the question based on your human psychology knowledge, you can also use this auxillary knowledge to help:

[Document(page_content='Taken\ttogether,\tthese\tskills\tare\tthe\tstuff\tof\tinterpersonal\tpolish,\tthe\tnecessary\ningredients\tfor\tcharm,\tsocial\tsuccess,\teven\tcharisma.\tThose\twho\tare\tadept\tin\nsocial\tintelligence\tcan\tconnect\twith\tpeople\tquite\tsmoothly,\tbe\tastute\tin\treading\ntheir\treactions\tand\tfeelings,\tlead\tand\torganize,\tand\thandle\tthe\tdisputes\tthat\tare\nbound\tto\tflare\tup\tin\tany\thuman\tactivity.\tThey\tare\tthe\tnatural\tleaders,\tthe\tpeople\nwho\tcan\texpress\tthe\tunspoken\tcollective\tsentiment\tand\tarticulate\tit\tso\tas\tto\nguide\ta\tgroup\ttoward\tits\tgoals.\tThey\tare\tthe\tkind\tof\tpeople\tothers\tlike\tto\tbe\twith\nbecause\tthey\tare\temotionally\tnourishing—they\tleave\tother\tpeople\tin\ta\tgood\nmood,\tand\tevoke\tthe\tcomment,\t"What\ta\tpleasure\tto\tbe\taround\tsomeone\tlike\nthat."\nT