In [1]:
import FlagEmbedding
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from openai import OpenAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "../shared_models/BAAI/bge-m3"
model_kwargs = {"device": "cuda"}
encode_kwargs = {"normalize_embeddings": True}

embeddings = HuggingFaceBgeEmbeddings(model_name=model_path, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
vector_store = FAISS.load_local("../arxiv_vector_store_cs", embeddings, allow_dangerous_deserialization=True)

query = "What is the latest research on AI?"
docs = vector_store.similarity_search(query, k=3)
print(docs)


[Document(metadata={'id': '1311.5998'}, page_content='Title: A brief network analysis of Artificial Intelligence publication\nAuthor: Yunpeng Li, Jie Liu, Yong Deng\nAbstract:   In this paper, we present an illustration to the history of Artificial\nIntelligence(AI) with a statistical analysis of publish since 1940. We\ncollected and mined through the IEEE publish data base to analysis the\ngeological and chronological variance of the activeness of research in AI. The\nconnections between different institutes are showed. The result shows that the\nleading community of AI research are mainly in the USA, China, the Europe and\nJapan. The key institutes, authors and the research hotspots are revealed. It\nis found that the research institutes in the fields like Data Mining, Computer\nVision, Pattern Recognition and some other fields of Machine Learning are quite\nconsistent, implying a strong interaction between the community of each field.\nIt is also showed that the research of Electron

In [3]:
# base_url = "https://api.minimax.chat/v1"
base_url = "https://api.lingyiwanwu.com/v1"
remote_model_name = "yi-lightning"
api_key = "c4ed4d9021634ba2a992fe155b0eb65c"

# Load openai client
client = OpenAI(
            api_key=api_key,
            base_url=base_url
        )

def rephrase_query(client, query):
    """将用户查询转换为适合arxiv的关键词"""
    response = client.chat.completions.create(
        model=remote_model_name,
        messages=[
            {
                "role": "system",
                "content": "You are a key information extractor and always respond with only one most relevant keyword or key phrase from the input for use in a search engine query. Focus on essential terms and maintain the original wording."
            },
            {"role": "user", "content": query}
        ]
    )
    return response.choices[0].message.content.strip()

def process_rag_results(client, query, chunks, metadata=None):
    """处理RAG结果并生成回答"""
    # 构建上下文
    context = []
    for i, chunk in enumerate(chunks):
        source = f"Source {i+1}"
        if metadata and i < len(metadata):
            source = metadata[i]
        context.append(f"{source}: {chunk}")
    
    context_str = "\n\n".join(context)
    
    # 生成回答
    response = client.chat.completions.create(
        model=remote_model_name,
        messages=[
            {
                "role": "system",
                "content": "You are an academic assistant. Please answer the question based on the provided literature fragments. When answering, please cite the source of the information."
            },
            {
                "role": "user",
                "content": f"Question: {query}\n\nContext:\n{context_str}"
            }
        ]
    )
    return response.choices[0].message.content

def process_rag_results_stream(client, query, chunks, metadata=None):
    """处理RAG结果并生成回答"""
    # 构建上下文
    context = []
    for i, chunk in enumerate(chunks):
        source = f"Source {i+1}"
        if metadata and i < len(metadata):
            source = metadata[i]
        context.append(f"{source}: {chunk}")
    
    context_str = "\n\n".join(context)
    
    # 生成回答 (启用流式输出)
    response = client.chat.completions.create(
        model=remote_model_name,
        messages=[
            {
                "role": "system",
                "content": "You are an academic assistant. Please answer the question based on the provided literature fragments. When answering, please cite the source of the information."
            },
            {
                "role": "user",
                "content": f"Question: {query}\n\nContext:\n{context_str}"
            }
        ],
        stream=True  # 启用流式输出
    )
    
    # 处理流式响应
    full_response = ""
    for chunk in response:
        if chunk.choices[0].delta.content is not None:
            content = chunk.choices[0].delta.content
            print(content, end="", flush=True)
            full_response += content
    print()  # 添加换行
    return full_response

In [1]:
from FlagEmbedding import LayerWiseFlagLLMReranker
# Rerank the results
reranker = LayerWiseFlagLLMReranker("../shared_models/BAAI/bge-reranker-v2-minicpm-layerwise", use_fp16=True, trust_remote_code=True)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards:  33%|███▎      | 1/3 [00:00<00:00,  7.74it/s]


ValueError: Loading ../shared_models/BAAI/bge-reranker-v2-minicpm-layerwise requires you to execute the configuration file in that repo on your local machine. Make sure you have read the code there to avoid malicious use, then set the option `trust_remote_code=True` to remove this error.

In [6]:
import numpy as np
query = "Can watermarked LLMs be identified by crafted prompts?"
rephrased_query = rephrase_query(client, query)
print(rephrased_query)

# calculate times for similarity search
import time
start_time = time.time()
results = vector_store.similarity_search_with_score(
    rephrased_query,
    k=10
)
end_time = time.time()
print(f"Time taken for similarity search: {end_time - start_time:.4f} seconds")

# combine a list of [[rephrased_query, doc_content], ...]
rerank_data = [[rephrased_query, doc.page_content] for doc in results]
scores = reranker.compute_score(rerank_data)
# get top 5 results' index
top_indices = np.argsort(scores)[-5:][::-1]
top_results = [results[i] for i in top_indices]

# 准备chunks和metadata
chunks = []
metadata = []
for doc, score in top_results:
    chunks.append(doc.page_content)
    metadata.append(f"Score: {score:.4f}")

answer = process_rag_results_stream(client, query, chunks, metadata)
# print(answer)


"watermarked LLMs identified by crafted prompts"
Time taken for similarity search: 0.4483 seconds
Yes, watermarked Large Language Models (LLMs) can be identified by crafted prompts, according to the literature. The paper by Aiwei Liu and colleagues (2024) introduces an identification algorithm called Water-Probe that detects watermarks through well-designed prompts to the LLM. Their research shows that watermarked LLMs exhibit consistent biases under the same watermark key, leading to similar differences across prompts under different watermark keys. This makes it possible to identify watermarked LLMs using specifically crafted prompts. The experiments conducted by the authors demonstrate that almost all mainstream watermarking algorithms can be easily identified with their well-designed prompts, while maintaining a minimal false positive rate for non-watermarked LLMs (Liu et al., 2024).

Furthermore, the work by Leonard Tang and colleagues (2023) also supports this finding by introduc