In [None]:
# OpenAI Chat App

In [None]:
# OpenAI interaction


import openai

openai.api_key = "..."

def openai_chat(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}]
    )
    return response['choices'][0]['message']['content']


In [3]:
# Extract text from pdf documents

import fitz  # PyMuPDF

# Function to read and extract text from a PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page_num in range(doc.page_count):
            page = doc.load_page(page_num)
            text += page.get_text()
    return text

# Paths to your PDF files
pdf_paths = ["C:/Users/dusti/Downloads/ARN42269-ALARACT_0872024-000-WEB-1.pdf", 
             "C:/Users/dusti/Downloads/ARN42268-ALARACT_0892024-000-WEB-1.pdf", 
             "C:/Users/dusti/Downloads/ARN42271-ALARACT_0912024-000-WEB-1.pdf"]

# Extract text from each PDF
documents = [extract_text_from_pdf(path) for path in pdf_paths]


In [None]:
# openai.embeddings_utils package is deprecated, can potentially use something along the lines of the following code to get embeddings

from openai import OpenAI
client = OpenAI(api_key="YOUR_API_KEY")

def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], 
model=model).data[0].embedding

df['ada_embedding'] = df.combined.apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))

In [4]:
# Set up FAISS index for document retrieval

import faiss
import numpy as np
from openai.embeddings_utils import get_embedding

# Load your documents
#documents = ["add documents"] ################################# should be able to use the variable from above

# Generate and store embeddings
def create_embeddings(texts):
    embeddings = [get_embedding(text) for text in texts]
    return np.array(embeddings)

embeddings = create_embeddings(documents)

# Initialize FAISS index
dimension = embeddings.shape[1]  # Embedding size (e.g., 1536 for OpenAI models)
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Function to retrieve top-k similar documents
def retrieve_relevant_documents(query, k=3):
    query_embedding = np.array([get_embedding(query)])
    _, indices = index.search(query_embedding, k)
    relevant_docs = [documents[i] for i in indices[0]]
    return "\n".join(relevant_docs)


ModuleNotFoundError: No module named 'openai.embeddings_utils'

In [None]:
# Set up RAG chatbot

def chat_with_rag(query):
    # Retrieve relevant documents based on the query
    retrieved_context = retrieve_relevant_documents(query)
    
    # Construct a prompt with the retrieved context
    augmented_prompt = f"Context: {retrieved_context}\n\nUser: {query}"
    
    # Get response from OpenAI
    response = openai_chat(augmented_prompt)
    return response


In [None]:
#Streamlit web interface

import streamlit as st

# Streamlit page setup
st.title("Chat with OpenAI and Your Data")
st.write("Ask questions based on your data, grounded by RAG")

# Chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# User input
user_input = st.text_input("Your message", "")

# Process user input
if user_input:
    # Use chat_with_rag to get a grounded response
    response = chat_with_rag(user_input)
    
    # Add to session state messages
    st.session_state.messages.append(("user", user_input))
    st.session_state.messages.append(("assistant", response))

# Display chat messages
for role, message in st.session_state.messages:
    if role == "user":
        st.write(f"**You**: {message}")
    else:
        st.write(f"**Assistant**: {message}")
