In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
import torch
from pydantic import BaseModel
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA, LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms.base import LLM
from typing import Optional, List, Dict, Any

torch.manual_seed(1234)

<torch._C.Generator at 0x7f9d84855710>

In [2]:
class QwenRunnable(LLM, BaseModel):
    model: Any
    tokenizer: Any
    device: str = "cuda:0"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response, _ = self.model.chat(self.tokenizer, query=prompt, history=None)
        return response

    @property
    def _llm_type(self) -> str:
        return "qwen"

class Qwen:
    def __init__(self, model_path: str, device: str = "cuda:0"):
        self.model_path = model_path
        self.device = device
        self.tokenizer = None
        self.model = None
        self.retriever = None
        self.llm_runnable = None

    def load_model(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(self.model_path, trust_remote_code=True)
        self.model.to(self.device)
        self.model.eval()
        self.model.generation_config = GenerationConfig.from_pretrained(self.model_path, trust_remote_code=True)
        self.llm_runnable = QwenRunnable(model=self.model, tokenizer=self.tokenizer, device=self.device)

    def load_retriever(self, doc_path: str, embedding_model_path: str, embedding_device: str = "cuda:0"):
        # Load documents
        loader = TextLoader(doc_path, encoding="utf-8")
        documents = loader.load()

        # Split documents into chunks
        text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
        docs = text_splitter.split_documents(documents)

        # Create the embedding function
        model_kwargs = {'device': embedding_device}
        embedding_function = HuggingFaceEmbeddings(model_name=embedding_model_path, model_kwargs=model_kwargs)

        # Load into Chroma
        db = Chroma.from_documents(docs, embedding_function)
        self.retriever = db.as_retriever()

    def generate_response(self, prompt: str, history: list = None):
        response, history = self.model.chat(self.tokenizer, query=prompt, history=history)
        return response

    def generate_image_caption(self, image_path: str, prompt: str):
        query = self.tokenizer.from_list_format([
            {'image': image_path}, 
            {'text': prompt}
        ])
        response, history = self.model.chat(self.tokenizer, query=query, history=None)
        return response

    def run_qa(self, query: str):
        qa = RetrievalQA.from_chain_type(llm=self.llm_runnable, chain_type="stuff", retriever=self.retriever)
        return qa.run(query)

    def run_chain(self, env: str):
        prompt = PromptTemplate(
            input_variables=["env"],
            template="对于产品{env}，有哪些评价?",
        )
        chain = LLMChain(llm=self.llm_runnable, prompt=prompt)
        return chain.run(env)

# Path to the model directory
model_path = "/data1/dxw_data/llm/Qwen-VL-Chat"
# Specify the device (e.g., 'cuda:0', 'cuda:1')
device = 'cuda:0'

# Instantiate and load the model
qwen_model = Qwen(model_path, device)
qwen_model.load_model()

2024-06-15 09:23:31.295774: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-15 09:23:31.418476: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-15 09:23:31.998410: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64
2024-06-15 09:23:31.998490: W tensorflow/compiler/xla/stream_exec

Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]

In [3]:
# Load the retriever
doc_path = "/data1/dxw_data/llm/mkt_llm/starbuck/starbuck_comments_1.txt"
embedding_model_path = "/data1/dxw_data/llm/text2vec-large-chinese"
qwen_model.load_retriever(doc_path, embedding_model_path, embedding_device='cuda:6')

# Example QA usage
query = "根据文档内容,请说明有哪些这些用户评论分为哪些主题"
qa_response = qwen_model.run_qa(query)
print(qa_response)

# Example Chain usage
chain_response = qwen_model.run_chain("御蝶坊的老婆饼")
print(chain_response)

# Example Image Captioning usage
image_folder = "/data1/dxw_data/llm/mkt-englishwords/data"
image_index = 0  # Change this to the actual image index
image_path = f"{image_folder}/{image_index}.png"
caption_prompt = "Generate some descriptions of the clothes in this image, following the social media style of tone"
caption_response = qwen_model.generate_image_caption(image_path, caption_prompt)
print(caption_response)

Created a chunk of size 127, which is longer than the specified 100
Created a chunk of size 119, which is longer than the specified 100
Created a chunk of size 101, which is longer than the specified 100
Created a chunk of size 229, which is longer than the specified 100
Created a chunk of size 105, which is longer than the specified 100
Created a chunk of size 152, which is longer than the specified 100
Created a chunk of size 101, which is longer than the specified 100
Created a chunk of size 115, which is longer than the specified 100
Created a chunk of size 182, which is longer than the specified 100
Created a chunk of size 104, which is longer than the specified 100
Created a chunk of size 185, which is longer than the specified 100
Created a chunk of size 199, which is longer than the specified 100
Created a chunk of size 104, which is longer than the specified 100
Created a chunk of size 404, which is longer than the specified 100
No sentence-transformers model found with name /

根据文档内容，这些用户评论可以分为以下主题：
1. 老婆饼：有用户表示御蝶坊的老婆饼是最有名的，味道好，但是也有用户表示购买的老婆饼口味发酸，怀疑是之前未卖完的重新装袋打日期卖的。
2. 价格：有用户表示御蝶坊的价格越来越贵。
3. 面包：有用户表示御蝶坊的面包新鲜，用料实料，是孩子放心的早餐选择，也推荐购买。
4. 蛋挞：有用户表示御蝶坊的蛋挞好吃，特别是海盐蛋糕和肉松小贝，但是也有用户表示蛋挞容易腻。
5. 服务：有用户表示御蝶坊的服务好，环境干净，服务热情。


  warn_deprecated(


作为一个AI语言模型，我无法直接获取御蝶坊老婆饼的评价，因为御蝶坊是一家位于台湾的食品公司，而我无法获取台湾以外的实时信息。不过，我可以告诉你，老婆饼是一种传统的中国糕点，通常由糯米粉、糖、花生和芝麻等材料制成。这种糕点在中国非常受欢迎，因为它们口感酥脆，味道甜美。如果你有机会尝试御蝶坊的老婆饼，你可以根据自己的口味来评价它们。
This is a beautiful, pastel-colored outfit! The pink skirt is the perfect length and material for summer. I love how it flows when I walk. The top is a nice fitted t-shirt that shows off my figure. The color is so pretty against my skin. I finished the look with some cute accessories, including a simple purse and some trendy slides. I feel so confident and put-together in this whole outfit.
