### Content Engine Implementation in Jupyter Notebook

In [None]:
# Import Required Libraries
import os
import streamlit as st
from llama_index import GPTSimpleVectorIndex, SimpleDocument
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA

### Documentation Section
# Overview
"""
The Content Engine is designed to analyze and compare multiple PDF documents, extracting insights
and highlighting differences using Retrieval Augmented Generation (RAG). It incorporates local embedding models
and a Large Language Model (LLM) to ensure data privacy. This notebook showcases the implementation
of a Content Engine using LangChain, Streamlit, and ChromaDB.
"""

# Installation Instructions
"""
1. Install required dependencies:
   ```bash
   pip install streamlit langchain llama-index chromadb pypdf2
   ```
2. Ensure the local embedding model and LLM are properly configured.
"""

### Setup and Initialization

In [None]:
# Function to Parse PDF Documents
def parse_documents(file_paths):
    documents = []
    for file_path in file_paths:
        loader = PyPDFLoader(file_path)
        documents.extend(loader.load())
    return documents

# Initialize Embedding Model and Vector Store
embedding_model = OpenAIEmbeddings()
vectorstore = None  # Placeholder for Chroma vector store

# Function to Generate and Store Embeddings
def generate_store_embeddings(documents):
    global vectorstore
    vectorstore = Chroma.from_documents(documents, embedding_model)

# Setup Query Engine
def setup_query_engine():
    llm = OpenAI()  # Replace with a local LLM for privacy
    return RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())


### Streamlit Integration for User Interaction

In [None]:
st.title("Content Engine for Document Analysis")

uploaded_files = st.file_uploader(
    "Upload PDF documents for analysis", type=["pdf"], accept_multiple_files=True
)

if uploaded_files:
    file_paths = [uploaded_file.name for uploaded_file in uploaded_files]
    documents = parse_documents(file_paths)
    st.info("Documents successfully loaded and parsed.")

    generate_store_embeddings(documents)
    st.success("Embeddings generated and stored locally.")

    qa_chain = setup_query_engine()
    st.success("Query Engine configured successfully!")

    st.subheader("Ask your queries!")
    query = st.text_input("Enter your question:")
    if query:
        response = qa_chain.run(query)
        st.write(f"Answer: {response}")

### Notes
# 1. Replace `OpenAI()` with a local LLM instance to ensure local processing.
# 2. Test the Streamlit app by running the script from the terminal:
#    ```bash
#    streamlit run app.py
#    
