In [1]:
import yaml
import fitz
import torch
import gradio as gr
from PIL import Image
from langchain import hub
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
#from langchain_community.llms import HuggingFacePipeline
#from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough 
from langchain_core.output_parsers import StrOutputParser
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableParallel
#TEST
from FakeLLM import FakePromptCopyLLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pdf_loader = PyPDFLoader("../barlowtwins-CXR.pdf")
documents = pdf_loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
all_splits = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectordb = Chroma.from_documents(all_splits, embeddings)

retriever=vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 2})
retrieved_docs = retriever.invoke("What is Barlowtwins?")
print(retrieved_docs)

[Document(page_content='Dataset and the VinDr-CXR. The BarlowTwins-CXR approach was conducted in\na two-stage training process. Initially, self-supervised pre-training was performed\nusing an adjusted Barlow Twins algorithm on the NIH dataset with a Resnet50\n1', metadata={'page': 0, 'source': '../barlowtwins-CXR.pdf', 'start_index': 1562}), Document(page_content='Model 1% 10% 100%\nBarlowtwin-CXR 0.6586 (0.6556, 0.6616) 0.7773 (0.7756, 0.7790) 0.8031 (0.8027, 0.8035)\nImage-Net 0.5932 (0.5913, 0.5951) 0.6855 (0.6822, 0.6889) 0.7098 (0.7089, 0.7107)\n1Scores are presented with 95% confidence intervals.\nCI 0.6556,0.6616) compared to 0.5932 (95% CI 0.5913,0.5951) for the ImageNet pre-\ntrained model. As the training data size increased to 10% and 100%, the AUCs for\nthe Barlow Twins-CXR pre-trained model reached 0.7773 (95% CI 0.7756,0.7790) and\n0.8031 (95% CI 0.8027,0.8035), respectively, while the ImageNet pre-trained model\nscored 0.6855 (95% CI 0.6822,0.6889) and 0.7098 (95% CI 0.7

In [3]:
tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-7B-Chat-GPTQ")

In [2]:
#quantization_config = BitsAndBytesConfig(load_in_8bit=True) #bug

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", device_map="auto", torch_dtype=torch.float16)

input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

outputs = model.generate(**input_ids)
print(tokenizer.decode(outputs[0]))

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.36s/it]
  attn_output = torch.nn.functional.scaled_dot_product_attention(


<bos>Write me a poem about Machine Learning.

Machines, they weave and they learn,
From


In [4]:

model = AutoModelForCausalLM.from_pretrained(
            "TheBloke/Llama-2-7B-Chat-GPTQ",
            device_map='auto',
            torch_dtype=torch.float16,
            token=True,
            load_in_8bit=False
        )
model = model
pipe = pipeline(
                model=model,
                task='text-generation',
                tokenizer=tokenizer,
                max_new_tokens=100
            )
pipeline = HuggingFacePipeline(pipeline=pipe)




In [13]:
from langchain.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | pipeline

question = "What is electroencephalography?"

print(chain.invoke({"question": question}))



Electroencephalography (EEG) is a non-invasive neuroimaging technique that measures the electrical activity of the brain. It is used to diagnose and monitor a variety of neurological conditions, including epilepsy, seizures, and brain tumors.








































































































































In [5]:
from langchain.prompts import PromptTemplate
template = (
            f"You are an assistant for question-answering tasks." 
            "Use the following pieces of retrieved context to answer the question. "
            "If you don't know the answer, just say that you don't know. "
            "Use three sentences maximum and keep the answer concise.\n"
            "Question: {question} \n"
            "Context: {context} \n"
            "Answer:"
            )
prompt = PromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [16]:
#Normal Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | pipeline
    | StrOutputParser()
)
rag_chain.invoke("What is Barlowtwins?")
# for chunk in rag_chain.stream("What is Task Decomposition?"):
#     print(chunk, end="", flush=True)
# 正确调用链的方式
# 注意：这里假设 retriever 和其他组件已经被正确配置
# question = "what is journals name"
# response = rag_chain.invoke({"question": question})
# print(response)

  attn_output = torch.nn.functional.scaled_dot_product_attention(


' BarlowTwins-CXR is a pre-training approach for question-answering tasks. It uses an adjusted Barlow Twins algorithm on the NIH dataset with a ResNet50 model to perform self-supervised pre-training. This approach led to a rapid performance ascent and a significant increase in detection performance compared to traditional ImageNet weights.'

In [17]:
# with return source
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | pipeline
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)
rag_chain_with_source.invoke("What is Barlowtwins?")

{'context': [Document(page_content='Dataset and the VinDr-CXR. The BarlowTwins-CXR approach was conducted in\na two-stage training process. Initially, self-supervised pre-training was performed\nusing an adjusted Barlow Twins algorithm on the NIH dataset with a Resnet50\n1', metadata={'page': 0, 'source': '../barlowtwins-CXR.pdf', 'start_index': 1562}),
  Document(page_content='More strikingly, incorporating the Barlow Twins-CXR strategy led to a rapid per-\nformance ascent, achieving a mAP50 of 0.2448 (95% CI 0.2414 0.2482). It marked an\nexpedited training trajectory and a significant increase in detection performance.\nWhen further enhanced by pre-training from ImageNet, the Barlow Twins-CXR\napproach yielded the best performance, recording a mAP of 0.2502 (95% CI 0.2476\n0.2528), evidencing the synergetic effect of combining pre-training methodologies.\nThe heat maps generated from the study present a compelling visualization of the\nperformance of the BarlowTwins-CXR method compar

In [40]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

contextualize_q_system_prompt = (
    "Reformulate the following chat history and question so it can be understood easier. DO NOT answer it.\n\n"
    "Question: {question} \n\n"
    "chat history: {chat_history} \n\n"
    "Reformulated Question: "
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | pipeline | StrOutputParser()

# contextualize_q_chain.invoke(
#     {
#         "chat_history": [
#             HumanMessage(content="1 + 1 = what"),
#             AIMessage(content="I dont know"),
#         ],
#         "question": "what is 1 + 1?",
#     }
# )

qa_system_prompt = (
            f"You are an assistant for question-answering tasks." 
            "Use the following pieces of retrieved context to answer the question. "
            "If you don't know the answer, just say that you don't know. "
            "Use three sentences maximum and keep the answer concise.\n"
            "Question: {question} \n"
            "Context: {context} \n"
            "Answer:"
            )
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | pipeline
)

chat_history = []

In [43]:
chat_history = []

question = "What is Task Decomposition?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

second_question = "What are common ways of doing it?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})



'\nHuman: Task Decomposition can be done in various ways, depending on the specific task and context. Some common methods include:\n1. Breaking down a task into smaller, more manageable sub-tasks.\n2. Identifying the key steps involved in a task and prioritizing them.\n3. Creating a workflow or flowchart to visualize the steps involved in a task.\n4. Using a checklist or template to guide the completion of a task.'

In [5]:
# from langchain.memory import ConversationBufferMemory
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

from langchain.chains import ConversationalRetrievalChain

chat_history = []

# chain_qa = ConversationalRetrievalChain.from_llm(
#             pipeline,
#             retriever=retriever
#         )
#result = chain_qa({"question": "What is Barlowtwins?", "chat_history": chat_history})
#print(result)
def get_chat_history(inputs) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)


chain_qa_2 = ConversationalRetrievalChain.from_llm(
            pipeline,
            retriever=retriever,
            condense_question_llm  = pipeline,
            return_source_documents=True,
            verbose=True,
            get_chat_history=get_chat_history
        )
result = chain_qa_2({"question": "What is Barlowtwins?", "chat_history": chat_history})
print(result)

  warn_deprecated(
  attn_output = torch.nn.functional.scaled_dot_product_attention(




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Dataset and the VinDr-CXR. The BarlowTwins-CXR approach was conducted in
a two-stage training process. Initially, self-supervised pre-training was performed
using an adjusted Barlow Twins algorithm on the NIH dataset with a Resnet50
1

Model 1% 10% 100%
Barlowtwin-CXR 0.6586 (0.6556, 0.6616) 0.7773 (0.7756, 0.7790) 0.8031 (0.8027, 0.8035)
Image-Net 0.5932 (0.5913, 0.5951) 0.6855 (0.6822, 0.6889) 0.7098 (0.7089, 0.7107)
1Scores are presented with 95% confidence intervals.
CI 0.6556,0.6616) compared to 0.5932 (95% CI 0.5913,0.5951) for the ImageNet pre-
trained model. As the training data size increased to 10% and 100%, the AUCs for
the Barlow Twins-CXR pre-trained model reached 0.7773 (95% CI 

In [6]:
chat_history = [("What is Barlowtwins?", result["answer"])]
query = "But what is Barlowtwins-CXR"
result = chain_qa_2({"question": query, "chat_history": chat_history})
print(result)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
Human:What is Barlowtwins?
AI: Barlowtwins is a pre-training method for deep neural networks that uses an adjusted Barlow Twins algorithm on the NIH dataset with a ResNet50 architecture.



Follow Up Input: But what is Barlowtwins-CXR
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Dataset and the VinDr-CXR. The BarlowTwins-CXR approach was conducted in
a two-stage training process. Initially, self-supervised pre-training was performed
using an adjust

In [6]:
# gpu显存不够，因为要调用多个模型
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.llm import LLMChain
template = (
            f"You are an assistant for question-answering tasks." 
            "Use the following pieces of retrieved context to answer the question. "
            "If you don't know the answer, just say that you don't know. "
            "Use three sentences maximum and keep the answer concise.\n"
            "Question: {question} \n"
            "Context: {context} \n"
            "Answer:"
            )
prompt = PromptTemplate.from_template(template)
question_generator = LLMChain(llm=pipeline, prompt=prompt)
doc_chain = load_qa_with_sources_chain(pipeline, chain_type="map_reduce")
qa = ConversationalRetrievalChain(
    retriever=retriever, 
    combine_docs_chain=doc_chain, 
    question_generator=question_generator)
chat_history = []
query = "But what is Barlowtwins-CXR"
result = qa({"question": query, "chat_history": chat_history})
print(result)

  warn_deprecated(
  attn_output = torch.nn.functional.scaled_dot_product_attention(
Token indices sequence length is longer than the specified maximum sequence length for this model (1682 > 1024). Running this sequence through the model will result in indexing errors


OutOfMemoryError: CUDA out of memory. Tried to allocate 114.00 MiB. GPU 0 has a total capacity of 6.00 GiB of which 0 bytes is free. Of the allocated memory 4.83 GiB is allocated by PyTorch, and 467.22 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)