In [16]:
import os
import torch
from transformers import (
  AutoTokenizer, 
  AutoModelForCausalLM, 
  BitsAndBytesConfig,
  pipeline)
import transformers


In [17]:
from transformers import BitsAndBytesConfig

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

import nest_asyncio

#################################################################
# Tokenizer

#################################################################

In [18]:
# 本地模型路径
local_model_path = "/disk2/elvys/Mistral-7B-Instruct-v0.2"
model_config = transformers.AutoConfig.from_pretrained(
    local_model_path,
)
tokenizer = AutoTokenizer.from_pretrained(local_model_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [19]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

Your GPU supports bfloat16: accelerate training with bf16=True


In [20]:
model = AutoModelForCausalLM.from_pretrained(
    local_model_path,
    torch_dtype=torch.float16,
    attn_implementation="flash_attention_2",
    device_map="auto"
)

Loading checkpoint shards: 100%|██████████| 6/6 [00:07<00:00,  1.20s/it]


In [21]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 7241732096
all model parameters: 7241732096
percentage of trainable model parameters: 100.00%


In [22]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [23]:
# 编码输入并生成注意力掩码
inputs_not_chat = tokenizer.encode_plus("""[INST] 
why CFA is programe is valuable[/INST]"""
                                        , return_tensors="pt", padding=True)

# 将输入张量移动到GPU
input_ids = inputs_not_chat['input_ids'].to('cuda')
attention_mask = inputs_not_chat['attention_mask'].to('cuda')

# 输出检查
print("Input IDs:", input_ids)
print("Attention Mask:", attention_mask)

# 调整生成参数并设置 torch.no_grad()
try:
    with torch.no_grad():
        generated_ids = model.generate(
            input_ids, 
            attention_mask=attention_mask, 
            max_new_tokens=1000, 
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            temperature=0.5,  # 调整temperature参数
        )
    decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    print(decoded)
except RuntimeError as e:
    print(f"An error occurred during generation: {e}")

Input IDs: tensor([[    1,   733, 16289, 28793, 28705,    13, 27014,   334,  3120,   349,
           430,   820,   433,   349, 12302, 28792, 28748, 16289, 28793]],
       device='cuda:0')
Attention Mask: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')


['[INST] \nwhy CFA is programe is valuable[/INST] The Chartered Financial Analyst (CFA) program is considered valuable for several reasons:\n\n1. Global Recognition: The CFA designation is recognized and respected by employers around the world. It demonstrates a high level of knowledge and expertise in the field of investment management and financial analysis.\n2. Comprehensive Curriculum: The CFA program covers a broad range of topics in finance, including ethical and professional standards, financial analysis, portfolio management, economics, and behavioral finance.\n3. Practical Skills: The CFA program emphasizes practical skills and real-world applications of financial concepts. Candidates are required to pass three levels of exams, each of which tests their ability to apply financial theory to real-world situations.\n4. Networking Opportunities: The CFA Institute and local CFA societies offer numerous networking opportunities for CFA charterholders. These networks can provide valu

In [24]:
import nest_asyncio
nest_asyncio.apply()


In [25]:
from langchain_community.document_loaders import PyPDFLoader
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader("/home/yzhao/projects/RD/LLM/document/Trading Classic Chart Patterns.pdf"),
]
loader = loaders[0]
pages = loader.load_and_split()

ValueError: File path /home/yzhao/projects/RD/LangChain/document/A Complete Guide to the Futures Market Technical Analysis, Trading Systems, Fundamental Analysis, Options, Spreads, and... (Jack D. Schwager) (Z-Library).pdf is not a valid file or url

In [None]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1500,
    chunk_overlap = 150
)
docs = []
for loader in loaders:
    docs.extend(loader.load())
chunked_documents = text_splitter.split_documents(docs)
len(chunked_documents)

NameError: name 'loaders' is not defined

In [None]:
# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

NameError: name 'chunked_documents' is not defined

In [None]:
# Create prompt template,此处可以进行promt engineering
prompt_template = """
### [INST] Instruction: You are a technical analyst.If the problem is not related to the Finance aera, just print 'this is not my aera',otherwise answer the question based on your technical indicator problem. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

print(rag_chain.invoke("what is CCI")['text'])




### [INST] Instruction: You are a technical analyst.If the problem is not related to the Finance aera, just print 'this is not my aera',otherwise answer the question based on your technical indicator problem. Here is context to help:

[Document(page_content='convergence-divergence (MACD), the price (or moving average) oscillator, the commodity chan-\nnel index (CCI), and the money flow index (MFI). (Note: There is little consistency in the technical indicator lexicon, especially with regard to more generic indicators. T erms such as momentum, rate of change, and price oscillator sometimes refer to different calculations in different sources. The names used here are widely applied, but may conflict with other sources. The calculations, not the names, are what are important.)\nFigure 11.6 compares five popular indicators: momentum, the “fast” stochastic oscillator, CCI,', metadata={'source': '/home/yzhao/projects/RD/LangChain/document/A Complete Guide to the Futures Market Technical Ana

In [None]:
# 关闭模型
model.cpu()  # 将模型移到CPU，释放GPU内存
del model  # 删除模型对象
torch.cuda.empty_cache()  # 清空GPU缓存