In [1]:
!pip install gradio langchain pypdf unstructured beautifulsoup4 requests tiktoken faiss-cpu openai
!pip install -U langchain-community
!pip install -U langchain-huggingface

Collecting gradio
  Downloading gradio-5.31.0-py3-none-any.whl.metadata (16 kB)
Collecting pypdf
  Downloading pypdf-5.5.0-py3-none-any.whl.metadata (7.2 kB)
Collecting unstructured
  Downloading unstructured-0.17.2-py3-none-any.whl.metadata (24 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting pyth

In [32]:

# === Essential Imports ===
import os
import gradio as gr
import requests
from tempfile import NamedTemporaryFile
import traceback

# === LangChain Document Loaders ===
from langchain.document_loaders import UnstructuredURLLoader, PyPDFLoader, TextLoader

# === Embeddings (Hugging Face version to avoid OpenAI quota) ===
from langchain.embeddings import HuggingFaceEmbeddings

# === Vector Store & Text Processing ===
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter

# === LLM Chain (using Hugging Face Hub) ===
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEndpoint

# === Hugging Face API Key ===
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "your-api-token"

llm = HuggingFaceEndpoint(
    #repo_id="google/flan-t5-large",
    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.5,
    max_new_tokens=512
)


In [33]:

def load_documents(file_obj=None, url=None):
    docs = []
    try:
        if file_obj:
            ext = os.path.splitext(file_obj.name)[-1].lower()
            if ext == ".pdf":
                loader = PyPDFLoader(file_obj.name)
            else:
                loader = TextLoader(file_obj.name)
            docs.extend(loader.load())
        elif url:
            if url.lower().endswith(".pdf"):
                response = requests.get(url)
                response.raise_for_status()
                with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
                    tmp.write(response.content)
                    tmp.flush()
                    loader = PyPDFLoader(tmp.name)
                    docs.extend(loader.load())
            else:
                loader = UnstructuredURLLoader(urls=[url])
                docs.extend(loader.load())
    except Exception as e:
        print(f"Document loading failed: {e}")
        print(traceback.format_exc())
        raise
    return docs


In [34]:

def create_qa_chain(docs):
    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    texts = splitter.split_documents(docs)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(texts, embeddings)
    qa = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())
    return qa


In [35]:

qa_chain = None

def ingest_and_prepare(file, url):
    global qa_chain
    try:
        docs = load_documents(file, url)
        if docs:
            qa_chain = create_qa_chain(docs)
            return "Document successfully loaded and indexed. Ask your question below."
        else:
            return "No documents were loaded. The file may be empty or unreadable."
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"


In [36]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

def ask_question(question):
    if not qa_chain:
        return 'Please upload a document or provide a URL first.'

    try:
        prompt = PromptTemplate(
            input_variables=["context", "question"],
            template="Given the following context:\n\n{context}\n\nAnswer the question: {question}"
        )

        doc_chain = LLMChain(llm=llm, prompt=prompt)
        stuff_chain = StuffDocumentsChain(
            llm_chain=doc_chain,
            document_variable_name="context"
        )

        docs = qa_chain.retriever.vectorstore.similarity_search(question)
        return stuff_chain.run({
            "input_documents": docs,
            "question": question
        })

    except Exception as e:
        return f"Error while answering: {str(e)}"


In [37]:

with gr.Blocks() as demo:
    gr.Markdown("""# Research Paper Q&A Chatbot\nUpload a document or enter a URL to a paper and ask questions about it.""")
    with gr.Row():
        file_input = gr.File(label="Upload Document")
        url_input = gr.Textbox(label="Or Enter URL")
        ingest_btn = gr.Button("Load Document")
    status_output = gr.Textbox(label="Status")
    question_input = gr.Textbox(label="Ask a question")
    answer_output = gr.Textbox(label="Answer")
    ingest_btn.click(fn=ingest_and_prepare, inputs=[file_input, url_input], outputs=status_output)
    question_input.submit(fn=ask_question, inputs=question_input, outputs=answer_output)

demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d9909831e99bd5e4bb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




📄 Free PDF Research Papers
These are direct PDF links from trusted open-access journals or repositories.


Artificial Intelligence	A Survey of the Recent Architectures of Deep Convolutional Neural Networks - https://arxiv.org/pdf/1901.06032.pdf

Healthcare AI	Deep Learning for Healthcare: Review, Opportunities and Challenges
https://arxiv.org/pdf/2006.12355.pdf


LLMs	Attention Is All You Need (Transformers)
https://arxiv.org/pdf/1706.03762.pdf


Ethics in AI	The Malicious Use of Artificial Intelligence
https://arxiv.org/pdf/1802.07228.pdf


Climate Change	Global Warming of 1.5°C – IPCC Summary
https://www.ipcc.ch/site/assets/uploads/sites/2/2019/05/SR15_SPM_version_report_LR.pdf