In [21]:
import requests
from bs4 import BeautifulSoup
import fitz  # PyMuPDF

# URL of the webpage containing PDF links
webpage_url = 'https://arxiv.org/list/cs.CV/recent?show=1'

# Fetch webpage content
response = requests.get(webpage_url)
html_content = response.text

# Parse HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Extract the publication date
date_element = soup.find('h3')
publication_date = date_element.get_text(strip=True) if date_element else 'Unknown date'

# Extract all titles, authors, and PDF URLs
base_url = 'https://arxiv.org'
papers = []

# Find all entries
entries = soup.find_all('dl')

for entry in entries:
    titles = entry.find_all('div', class_='list-title mathjax')
    authors = entry.find_all('div', class_='list-authors')
    pdf_links = entry.find_all('a', title='Download PDF')

    for title, author, pdf_link in zip(titles, authors, pdf_links):
        paper_title = title.get_text(strip=True).replace('Title:', '').strip()
        paper_authors = [a.get_text(strip=True) for a in author.find_all('a')]
        paper_pdf_url = base_url + pdf_link['href']
        papers.append({
            'title': paper_title,
            'authors': paper_authors,
            'pdf_url': paper_pdf_url,
            'date': publication_date
        })

def extract_text_from_pdf(paper):
    text = f"publication date of {paper['title']}: {paper['date']}\nTitle: {paper['title']}\nAuthors of {paper['title']}: {', '.join(paper['authors'])}\n\n"
    try:
        # Download PDF or directly process it
        response = requests.get(paper['pdf_url'], stream=True)
        document = fitz.open(stream=response.content, filetype="pdf")

        for page_num in range(len(document)):
            page = document.load_page(page_num)
            text += page.get_text()

    except Exception as e:
        text += f"Error processing PDF at {paper['pdf_url']}: {e}"

    return text

# Extract text from each PDF and print it along with title, authors, and date
for paper in papers:
    extracted_text = extract_text_from_pdf(paper)
    print(extracted_text)
    print("\n" + "="*80 + "\n")


publication date of InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output: Thu, 4 Jul 2024 (showing first 1 of 115 entries )
Title: InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output
Authors of InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output: Pan Zhang, Xiaoyi Dong, Yuhang Zang, Yuhang Cao, Rui Qian, Lin Chen, Qipeng Guo, Haodong Duan, Bin Wang, Linke Ouyang, Songyang Zhang, Wenwei Zhang, Yining Li, Yang Gao, Peng Sun, Xinyue Zhang, Wei Li, Jingwen Li, Wenhai Wang, Hang Yan, Conghui He, Xingcheng Zhang, Kai Chen, Jifeng Dai, Yu Qiao, Dahua Lin, Jiaqi Wang

InternLM-XComposer-2.5: A Versatile Large Vision Language Model
Supporting Long-Contextual Input and Output
Pan Zhang∗1, Xiaoyi Dong∗1,2, Yuhang Zang∗1, Yuhang Cao1, Rui Qian1,2, Lin Chen1, Qipeng Guo1,
Haodong Duan1, Bin Wang1, Linke Ouyang1, Songyang Zhang1, Wenwe

In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
extracted_texts=[extracted_text]
# Split text into manageable chunks/documents
text_splitter = RecursiveCharacterTextSplitter(
    
    chunk_size=1000,
    chunk_overlap=200, # Adjust overlap size based on requirements
)

documents = []
for text in extracted_texts:
    chunks = text_splitter.split_text(text)
    documents.extend(chunks)


In [23]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# Initialize the embeddings model from HuggingFace
embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")



In [24]:
from langchain_community.vectorstores import FAISS
# Create a FAISS vector store from documents and embeddings
vector = FAISS.from_texts(documents, embeddings)
# Create a retriever from the vector store
retriever = vector.as_retriever()

In [25]:
from langchain_community.llms import Ollama
# Initialize the Ollama language model
llm = Ollama(model="llama3")

In [26]:
from langchain_core.output_parsers import StrOutputParser
# Initialize the output parser for string outputs
output_parser = StrOutputParser()

In [27]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# Define the system instruction for reformulating the user's question

instruction_to_system = """
Given a chat history and the latest user question 
which might reference context in the chat history, formulate a standalone question 
which can be understood without the chat history. Do NOT answer the question, 
just reformulate it if needed and otherwise return it as is.
"""
# Create a prompt template for reformulating questions
question_maker_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", instruction_to_system),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


# Define a chain that reformulates the question if needed
question_chain = question_maker_prompt | llm | StrOutputParser()

In [28]:
# Define the system prompt for the question-answering assistant
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, provide a summary of the context. Do not generate your answer.\
{context}"""

# Create a prompt template for the question-answering task
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

In [29]:
# Function to determine if question needs reformulation based on chat history
def contextualized_question(input: dict):
    if input.get("chat_history"):
        return question_chain
    else:
        return input["question"]

In [30]:
from langchain_core.runnables import RunnablePassthrough
# Create a retriever chain to fetch relevant context for the question
retriever_chain = RunnablePassthrough.assign(
        context=contextualized_question | retriever #| format_docs
    )

In [31]:
# Define the Retrieval-Augmented Generation (RAG) chain
rag_chain = (
    retriever_chain
    | qa_prompt
    | llm
)

In [32]:
chat_history = []
question = "publication date of InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output"
from langchain_core.messages import AIMessage, HumanMessage

#Invoke the RAG chain with the question and chat history, and update chat history with responses
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])
print(ai_msg)

According to the provided context, the publication date of InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output is:

Thu, 4 Jul 2024


In [33]:
question = "authors "

In [34]:
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])
print(ai_msg)

Based on the provided context, the answer to your question is:

The publication date of InternLM-XComposer-2.5: A Versatile Large Vision Language Model Supporting Long-Contextual Input and Output is Thu, 4 Jul 2024.

Additionally, according to the context, the authors of this document are:

Pan Zhang, Xiaoyi Dong, Yuhang Zang, Yuhang Cao, Rui Qian, Lin Chen, Qipeng Guo, Haodong Duan, Bin Wang, Linke Ouyang, Songyang Zhang, Wenwei Zhang, Yining Li, Yang Gao, Peng Sun, Xinyue Zhang, Wei Li, Jingwen Li, Wenhai Wang, Hang Yan, Conghui He, Xingcheng Zhang, Kai Chen, Jifeng Dai, Yu Qiao, Dahua Lin, Jiaqi Wang


In [35]:
question = "great summarize this paper"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question), ai_msg])
print(ai_msg)

Based on the provided context, I can summarize this paper as follows:

The paper introduces InternLM-XComposer-2.5 (IXC-2.5), a versatile large vision language model that supports long-contextual input and output. The authors highlight two advantages of IXC-2.5: its versatility in supporting various tasks related to comprehension and composition, such as free-form text-image conversation, OCR, video understanding, article composition with illustrations, and webpage crafting; and its ability to handle long-term human-AI interaction and content creation through its long-contextual capabilities.

The paper also presents the evaluation results of IXC-2.5 on 28 benchmarks, outperforming existing open-source state-of-the-art models on 16 benchmarks. Additionally, it compares IXC-2.5's performance to GPT-4V and Gemini Pro on 16 key tasks, showing that IXC-2.5 matches or surpasses these models in many cases.

The authors conclude by emphasizing the potential of open-source LVLMs like IXC-2.5 t

In [37]:
import gradio as gr
from langchain_core.messages import HumanMessage

# Function to handle the chat interaction
def chat_complete(message, state):
    if state is None:
        state = []
    ai_msg = rag_chain.invoke({"question": message, "chat_history": state})
    state.append({"role": "user", "content": message})
    state.append({"role": "assistant", "content": ai_msg})
    response = [(msg["content"], state[i+1]["content"]) for i, msg in enumerate(state) if msg["role"] == "user"]
    return response, state

# Define the Gradio interface
with gr.Blocks() as block:
    gr.Markdown("""<h1><center> EduVisionBot </center></h1>""")
    chatbot = gr.Chatbot()
    message = gr.Textbox(placeholder="Type your Message.........")
    state = gr.State([])
    submit = gr.Button("SEND")
    
    submit.click(chat_complete, inputs=[message, state], outputs=[chatbot, state])
block.launch(debug=True)



Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.


