In [1]:

import os
import json
import logging
from pathlib import Path
from typing import List, Dict, Any
from datetime import datetime

# Core libraries
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

# LlamaIndex imports
from llama_index.core import Document, SimpleDirectoryReader
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.schema import BaseNode, TextNode


# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("✅ All imports loaded successfully!")
print("📁 Ready to process tagged region sermon content")

✅ All imports loaded successfully!
📁 Ready to process tagged region sermon content


In [2]:
reader = SimpleDirectoryReader(input_dir="./devo_dir", recursive=True)
documents = reader.load_data()
print(f"📄 Loaded {len(documents)} documents from './devo_dir'")

📄 Loaded 1 documents from './devo_dir'


In [3]:

EMBEDDING_MODEL = "text-embedding-3-large"
EMBEDDING_DIMENSIONS = 3072


parser = SimpleNodeParser(
        chunk_size=512,
        chunk_overlap=20,
        include_metadata=True,
        include_prev_next_rel=True
    )
nodes = parser.get_nodes_from_documents(documents)
print(f"📝 Parsed {len(nodes)} nodes from documents")

📝 Parsed 4 nodes from documents


In [None]:
client = OpenAI()
print("🤖 OpenAI client initialized")

pc = Pinecone()
response = client.embeddings.create(
        input=[n.text for n in nodes],
        model=EMBEDDING_MODEL
    )
input_vector = response.data[0].embedding

aog_index = pc.Index("aog-devo")
response = aog_index.upsert(vectors=[
        (str(i), node_embedding.embedding, {"text": nodes[i].text, **nodes[i].metadata})
        for i, node_embedding in enumerate(response.data)
    ])

🤖 OpenAI client initialized


2025-08-28 10:55:35,909 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  from .autonotebook import tqdm as notebook_tqdm


TypeError: object of type 'NoneType' has no len()

In [5]:
def test_rag_functionality(pinecone_index, openai_client, test_query: str = "family devotion"):
           """
           Test the RAG functionality for retrieving relevant content.
           """
           print(f"🔍 Testing RAG functionality with query: '{test_query}'")

           try:
               # Create embedding for the test query
               query_response = openai_client.embeddings.create(
                   input=test_query,
                   model=EMBEDDING_MODEL
               )
               query_embedding = query_response.data[0].embedding

               # Search Pinecone for similar content chunks
               search_response = pinecone_index.query(
                   vector=query_embedding,
                   top_k=5,
                   include_metadata=True,
                   include_values=False
               )

               print(f"🎯 Found {len(search_response.matches)} relevant chunks")

               # Display results
               for i, match in enumerate(search_response.matches, 1):
                   print(f"\n📄 Result {i} (Score: {match.score:.4f}):")
                   if match.metadata:
                       print(f"   Content: {match.metadata.get('text', 'No text available')[:200]}...")
                   else:
                       print("   No metadata available")

               return search_response

           except Exception as e:
               print(f"❌ Error testing RAG functionality: {str(e)}")
               return None

       # Test the RAG functionality
test_result = test_rag_functionality(aog_index, client)
print("\n✅ RAG functionality test complete")

🔍 Testing RAG functionality with query: 'family devotion'


2025-08-28 10:56:49,258 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


🎯 Found 4 relevant chunks

📄 Result 1 (Score: 0.3440):
   Content: Day 2—FAMILY DEVOTIONS



Faith Fact

We believe that . . .The Bible is God’s Word and His plan for me.

Faith Verse

Psalm 119:105 (NLT)

Your word is a lamp to guide my feet and a light for my path....

📄 Result 2 (Score: 0.2883):
   Content: Foundations for Faith	                     	The Scriptures Inspired | Session 1	





Day 1—FAMILY DEVOTIONS 



Faith Fact

We believe that . . .The Bible is God’s Word and His plan for me.

Faith Ve...

📄 Result 3 (Score: 0.2052):
   Content: Faith Fact

We believe that . . .The Bible is God’s Word and His plan for me.

Faith Verse

Psalm 119:105 (NLT)

Your word is a lamp to guide my feet and a light for my path.

Foundation for Faith 

F...

📄 Result 4 (Score: 0.2041):
   Content: © 2023 by The General Council of the Assemblies of God, 1445 N. Boonville Ave., Springfield, Missouri 65802. All rights reserved. Unauthorized duplication is 
prohibited. This document may be altere

In [6]:
# Let's examine the document and its content structure
print("📖 Examining document in devo_dir...")
for doc in documents:
    print(f"Document ID: {doc.id_}")
    print(f"Document text length: {len(doc.text)} characters")
    print(f"Document metadata: {doc.metadata}")
    print(f"First 300 characters: {doc.text[:300]}...")
    print("-" * 50)

📖 Examining document in devo_dir...
Document ID: 815d84c3-6ad8-4406-8bd1-7d572124f559
Document text length: 5885 characters
Document metadata: {'file_name': 'FFF_FD_1.docx', 'file_path': '/home/bam/aog-devo/devo_dir/FFF_FD_1.docx', 'file_type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'file_size': 799835, 'creation_date': '2025-08-28', 'last_modified_date': '2025-08-28'}
First 300 characters: Foundations for Faith	                     	The Scriptures Inspired | Session 1	





Day 1—FAMILY DEVOTIONS 



Faith Fact

We believe that . . .The Bible is God’s Word and His plan for me.

Faith Verse

Psalm 119:105 (NLT)

Your word is a lamp to guide my feet and a light for my path.

Foundation ...
--------------------------------------------------
