In [2]:
import json
from langchain_community.document_loaders import PyPDFLoader

### Load from SQL Lite Database

In [38]:
import sqlite3

# Connect to the SQLite database
with sqlite3.connect("data/papers.db") as conn:
    conn.row_factory = sqlite3.Row  # Set row factory to sqlite3.Row for named columns
    c = conn.cursor()

    # Execute the query to select all columns from the papers table
    c.execute("SELECT * FROM papers")

    # Fetch all results
    all_rows = c.fetchall()

    # Convert fetched rows to list of dictionaries
    paper_metadata = [dict(row) for row in all_rows]

    # Check if any rows were fetched and print the results
    if paper_metadata:
        print("Fetched records.")
        # for row in list_of_dicts:
        #     print(row)
    else:
        print("No records found.")

Fetched records.


In [39]:
print(len(paper_metadata))
paper_metadata[0]

10


{'url': 'https://paperswithcode.com/paper/transparent-image-layer-diffusion-using',
 'title': 'Transparent Image Layer Diffusion using Latent Transparency',
 'arxiv_link': 'https://arxiv.org/pdf/2402.17113v2.pdf',
 'published': '2024-02-28',
 'authors': 'Lvmin Zhang, Maneesh Agrawala',
 'summary': 'We present LayerDiffusion, an approach enabling large-scale pretrained latent\ndiffusion models to generate transparent images. The method allows generation\nof single transparent images or of multiple transparent layers. The method\nlearns a "latent transparency" that encodes alpha channel transparency into the\nlatent manifold of a pretrained latent diffusion model. It preserves the\nproduction-ready quality of the large diffusion model by regulating the added\ntransparency as a latent offset with minimal changes to the original latent\ndistribution of the pretrained model. In this way, any latent diffusion model\ncan be converted into a transparent image generator by finetuning it with th

### Load From JSON

In [3]:
# Specify the filename
filename = "data/paper_metadata.json"

# Write the dictionary to a file
with open(filename, "r") as file:
    paper_metadata = json.load(file)

In [4]:
print(len(paper_metadata))
paper_metadata[0]

10


{'url': 'https://paperswithcode.com/paper/transparent-image-layer-diffusion-using',
 'title': 'Transparent Image Layer Diffusion using Latent Transparency',
 'arxiv_link': 'https://arxiv.org/pdf/2402.17113v2.pdf',
 'published': '2024-02-28',
 'authors': 'Lvmin Zhang, Maneesh Agrawala',
 'summary': 'We present LayerDiffusion, an approach enabling large-scale pretrained latent\ndiffusion models to generate transparent images. The method allows generation\nof single transparent images or of multiple transparent layers. The method\nlearns a "latent transparency" that encodes alpha channel transparency into the\nlatent manifold of a pretrained latent diffusion model. It preserves the\nproduction-ready quality of the large diffusion model by regulating the added\ntransparency as a latent offset with minimal changes to the original latent\ndistribution of the pretrained model. In this way, any latent diffusion model\ncan be converted into a transparent image generator by finetuning it with th

## Indexing - Load & Split

In [5]:
docs = []
for paper in paper_metadata:
    link = paper["arxiv_link"]
    loader = PyPDFLoader(link)
    doc = loader.load_and_split()
    for idoc in doc:
        idoc.metadata["title"] = paper["title"]
        idoc.metadata["published"] = paper["published"]
        idoc.metadata["authors"] = paper["authors"]
        idoc.metadata["summary"] = paper["summary"]
    docs.extend(doc)

In [6]:
docs[0].metadata

{'source': 'https://arxiv.org/pdf/2402.17113v2.pdf',
 'page': 0,
 'title': 'Transparent Image Layer Diffusion using Latent Transparency',
 'published': '2024-02-28',
 'authors': 'Lvmin Zhang, Maneesh Agrawala',
 'summary': 'We present LayerDiffusion, an approach enabling large-scale pretrained latent\ndiffusion models to generate transparent images. The method allows generation\nof single transparent images or of multiple transparent layers. The method\nlearns a "latent transparency" that encodes alpha channel transparency into the\nlatent manifold of a pretrained latent diffusion model. It preserves the\nproduction-ready quality of the large diffusion model by regulating the added\ntransparency as a latent offset with minimal changes to the original latent\ndistribution of the pretrained model. In this way, any latent diffusion model\ncan be converted into a transparent image generator by finetuning it with the\nadjusted latent space. We train the model with 1M transparent image layer

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

## Indexing - Store
### Vectorization & Embedding

Cost?

In [8]:
import tiktoken

# Create encoder
encoder = tiktoken.get_encoding('cl100k_base')
tokens_per_docs = [len(encoder.encode(doc.page_content)) for doc in splits]


# Estimated cost = sum of tokens / 1000
cost_per_1000_tokens = 0.0001
cost = (sum(tokens_per_docs) / 1000) * cost_per_1000_tokens
print(f"Total Embedding Cost= ~${round(cost,3)}")

Total Embedding Cost= ~$0.034


In [9]:
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

#Embed and store the texts
# Supplying a persist dicrectory will store the embeddings on disk
persist_direcory = 'data/vectordb'

embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=persist_direcory)
vectordb.persist()

### Pull specific paper from vector database

In [47]:
# results = vectordb.similarity_search(query= "Transparent Image Layer Diffusion using Latent Transparency")
# for result in results:
#     print(result.metadata)

## Retrieval & Generate

In [10]:
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import dotenv

dotenv.load_dotenv()

True

In [11]:
from langchain_core.prompts import PromptTemplate

template = """You are an assistant for question-answering tasks Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)


In [28]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectordb.as_retriever(search_kwargs={"k": 10})
# search_kwargs={"score_threshold": 0.5}
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

## Without sources

In [26]:
# rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | custom_rag_prompt
#     | llm
#     | StrOutputParser()
# )

In [24]:
# for chunk in rag_chain.stream("Describe the currently trending research."):
#     print(chunk, end="", flush=True)

### With sources:

In [29]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

response = rag_chain_with_source.invoke("Summarize the paper titled Transparent Image Layer Diffusion using Latent Transparency")

In [30]:
for doc in response['context']:
    print(doc.metadata)

{'authors': 'Lvmin Zhang, Maneesh Agrawala', 'page': 0, 'published': '2024-02-28', 'source': 'https://arxiv.org/pdf/2402.17113v2.pdf', 'summary': 'We present LayerDiffusion, an approach enabling large-scale pretrained latent\ndiffusion models to generate transparent images. The method allows generation\nof single transparent images or of multiple transparent layers. The method\nlearns a "latent transparency" that encodes alpha channel transparency into the\nlatent manifold of a pretrained latent diffusion model. It preserves the\nproduction-ready quality of the large diffusion model by regulating the added\ntransparency as a latent offset with minimal changes to the original latent\ndistribution of the pretrained model. In this way, any latent diffusion model\ncan be converted into a transparent image generator by finetuning it with the\nadjusted latent space. We train the model with 1M transparent image layer pairs\ncollected using a human-in-the-loop collection scheme. We show that l

In [31]:
response['answer']

'The paper introduces "latent transparency" to enable large-scale pretrained latent diffusion models to generate transparent images and layers. It encodes alpha channel transparency into the latent distribution of Stable Diffusion while preserving high-quality output. The method was trained with 1M transparent image layer pairs and user studies showed preference for the generated transparent content over traditional methods.'

## Chat history

In [32]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [33]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [34]:
from langchain_core.messages import AIMessage, HumanMessage

In [35]:
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [36]:
chat_history = []

question = "Summarize the paper titled Transparent Image Layer Diffusion using Latent Transparency"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])

second_question = "What are the notable methodologies in this research?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})

AIMessage(content='The research introduces the concept of "latent transparency," which allows latent diffusion models to generate transparent images and multiple transparent layers without altering the original latent distribution. This methodology enables encoding alpha channel transparency into the latent space. The approach maintains high-quality results and is preferred over traditional methods like matting, showing comparable quality to commercial assets.')