In [72]:
import os

# 1. Check current directory
current_dir = os.getcwd()
print(f"Working Directory: {current_dir}")

# 2. List all PDF files
try:
    files = [f for f in os.listdir('.') if f.endswith('.pdf')]
    print(f"‚úÖ Success! Found {len(files)} PDFs.")
    for f in files:
        print(f" - {f}")
except Exception as e:
    print(f"‚ùå Still blocked: {e}")

Working Directory: /Users/derekmoore/Desktop/OCI-GenAI-RAG-Project
‚úÖ Success! Found 16 PDFs.
 - Danny Hsu TRANSCRIPT PODCAST.pdf
 - dylan milstein podcast transcript.pdf
 - kevin cahill podcast transcript.pdf
 - PODCAST SCRIPT Nico Pigni.pdf
 - Maria Lozada Podcast TRANSCRIPT.pdf
 - proptech_basics.pdf
 - Mike Russo TRANSCRIPT.pdf
 - mor milo pod transcript.pdf
 - Buddy Rushing WhiteFeather Investments PODCAST TRANSCRIPT.pdf
 - TRANSCRIPT OF PODCAST Leland Remias.pdf
 - TRANSCRIPT Alan Grosheider.pdf
 - Joseph El Am Prypco Podcast Transcript.pdf
 - greg offerd PODCAST TRANSCRIPT.pdf
 - PropTech_faq.pdf
 - Tom Gabrielle podcast TRANSCRIPT.pdf
 - PODCAST Transcript Josh Glasser Qwesty.pdf


In [73]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 1. Gather all your PDF names
pdf_files = [f for f in os.listdir('.') if f.endswith('.pdf')]

all_chunks = []
# We'll use a splitter to break the 16 PDFs into small "searchable" pieces
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

print(f"Processing {len(pdf_files)} files...")

for pdf in pdf_files:
    loader = PyPDFLoader(pdf)
    data = loader.load()
    # Break the pages into smaller 1000-character chunks
    chunks = text_splitter.split_documents(data)
    all_chunks.extend(chunks)
    print(f"Done with: {pdf} ({len(chunks)} chunks created)")

print("-" * 30)
print(f"‚úÖ FINAL TOTAL: {len(all_chunks)} text chunks ready for the AI Database.")

Processing 16 files...
Done with: Danny Hsu TRANSCRIPT PODCAST.pdf (38 chunks created)
Done with: dylan milstein podcast transcript.pdf (31 chunks created)
Done with: kevin cahill podcast transcript.pdf (30 chunks created)
Done with: PODCAST SCRIPT Nico Pigni.pdf (33 chunks created)
Done with: Maria Lozada Podcast TRANSCRIPT.pdf (30 chunks created)
Done with: proptech_basics.pdf (5 chunks created)
Done with: Mike Russo TRANSCRIPT.pdf (31 chunks created)
Done with: mor milo pod transcript.pdf (46 chunks created)
Done with: Buddy Rushing WhiteFeather Investments PODCAST TRANSCRIPT.pdf (48 chunks created)
Done with: TRANSCRIPT OF PODCAST Leland Remias.pdf (27 chunks created)
Done with: TRANSCRIPT Alan Grosheider.pdf (39 chunks created)
Done with: Joseph El Am Prypco Podcast Transcript.pdf (29 chunks created)
Done with: greg offerd PODCAST TRANSCRIPT.pdf (36 chunks created)
Done with: PropTech_faq.pdf (6 chunks created)
Done with: Tom Gabrielle podcast TRANSCRIPT.pdf (36 chunks created)
Do

In [74]:
import oci

# Use the config dictionary we talked about earlier
# Replace the placeholder OCIDs with your actual ones
oci_config = {
    "user": "ocid1.user.oc1..aaaaaaaape6miicevicqskax5ixfjvughaatgvbkcv76dt6h2ukzkqgx2udq", 
    "fingerprint": "30:6e:74:f3:d6:c3:20:79:5e:36:c8:f9:86:bb:c3:7c", 
    "tenancy": "ocid1.tenancy.oc1..aaaaaaaapgfazp34bhifdky2itxmxrqvrcgzs2vr5limlz3fb7geh366gc3a",
    "region": "us-ashburn-1",
    "key_file": "/Users/derekmoore/ .oci/oci_api_key.pem"
}

# This is the "Truth Test"
gen_ai_client = oci.generative_ai.GenerativeAiClient(oci_config)
try:
    # We are asking OCI to list the models it sees for your account
    models = gen_ai_client.list_models(compartment_id=oci_config["tenancy"]).data
    print("‚úÖ I can see the following models via API:")
    for m in models.items:
        if "embed" in m.display_name.lower():
            print(f" - {m.display_name} (ID: {m.id})")
except Exception as e:
    print(f"‚ùå API Error: {e}")

‚úÖ I can see the following models via API:
 - cohere.embed-v4.0 (ID: ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceyahw4vlsxm7newcqtlgmristnwxlrxox3h7bcnlomjpgwa)
 - cohere.embed-multilingual-light-image-v3.0 (ID: ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceyanjovpmwmspjzwharl4tebjamhffc5brdqhvyvboarpyq)
 - cohere.embed-multilingual-image-v3.0 (ID: ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceyazeracodio7mgnoq76vk26jdvdt7x7pa4amy3s6yomplq)
 - cohere.embed-english-image-v3.0 (ID: ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceyaukpmlzyv2y3rb2sqdw4ldqsysxqula3wfnhadnj77drq)
 - cohere.embed-english-light-image-v3.0 (ID: ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceya56ycvdjfdwciqcgpzinzz72jre65z57rgyo4bvq7h55a)
 - cohere.embed-multilingual-v3.0 (ID: ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceyaf4ga422xmco2gqbwaks7chwt24y6qtofhwwfrpxjxpxa)


In [75]:
from langchain_community.embeddings import OCIGenAIEmbeddings

# Use the specific OCID for Ashburn that your "Truth Test" previously found
# This is the most bulletproof way to identify the model
model_ocid = "ocid1.generativeaimodel.oc1.iad.amaaaaaask7dceyahw4vlsxm7newcqtlgmristnwxlrxox3h7bcnlomjpgwa"

embeddings = OCIGenAIEmbeddings(
    model_id=model_ocid,
    compartment_id=oci_config["tenancy"], 
    client=gen_ai_inference_client 
)
print("‚úÖ Embeddings re-initialized with the direct OCID.")

‚úÖ Embeddings re-initialized with the direct OCID.


In [76]:
!nc -v adb.us-ashburn-1.oraclecloud.com 1521

Connection to adb.us-ashburn-1.oraclecloud.com port 1521 [tcp/ncube-lm] succeeded!


In [77]:
import oracledb

# 1. The EXACT subfolder where your .pem and .ora files live
final_wallet_path = "/Users/derekmoore/Documents/Oracle_Wallets/New_Wallet/Wallet_AssetsandAlgorithms"

# 2. Connection String
dsn_string = """(description=(address=(protocol=tcps)(port=1522)(host=adb.us-ashburn-1.oraclecloud.com))(connect_data=(service_name=ge6041e4e441a34_assetsandalgorithms_low.adb.oraclecloud.com))(security=(ssl_server_dn_match=yes)))"""

try:
    print(f"üöÄ Targeting wallet in: {final_wallet_path}")
    conn = oracledb.connect(
        user="ADMIN",
        password="4ssets&Algorithms",
        dsn=dsn_string,
        wallet_location=final_wallet_path,
        wallet_password="PropTech2026!"
    )
    print("‚úÖ CONNECTION SUCCESSFUL!")
    print(f"Connected to: {conn.version}")
except oracledb.Error as e:
    print(f"‚ùå Connection failed: {e}")

üöÄ Targeting wallet in: /Users/derekmoore/Documents/Oracle_Wallets/New_Wallet/Wallet_AssetsandAlgorithms
‚úÖ CONNECTION SUCCESSFUL!
Connected to: 23.26.0.1.0


In [78]:
cursor = conn.cursor()
try:
    # Safely clear the old 1024-dim table
    cursor.execute("DROP TABLE IF EXISTS PROPTECH_KNOWLEDGE PURGE")
    
    # Create the new table with 384 dimensions for the Light model
    cursor.execute("""
        CREATE TABLE PROPTECH_KNOWLEDGE (
            id VARCHAR2(64) PRIMARY KEY,
            text CLOB,
            metadata JSON,
            embedding VECTOR(384, FLOAT32)
        )
    """)
    conn.commit()
    print("‚úÖ Table reset successfully for 384-dimensional 'Light' embeddings.")
except Exception as e:
    print(f"‚ùå Table reset failed: {e}")
finally:
    cursor.close()

‚úÖ Table reset successfully for 384-dimensional 'Light' embeddings.


In [79]:
from langchain_community.embeddings import OCIGenAIEmbeddings

# Use the specific canonical name for the Light version
# This model produces 384 dimensions
model_id = "cohere.embed-english-light-v3.0" 

embeddings = OCIGenAIEmbeddings(
    model_id=model_id,
    compartment_id=oci_config["tenancy"],
    client=gen_ai_inference_client 
)
print(f"‚úÖ Librarian re-initialized with: {model_id}")

‚úÖ Librarian re-initialized with: cohere.embed-english-light-v3.0


In [80]:
import oci
from oci.generative_ai_inference import GenerativeAiInferenceClient
from langchain_community.embeddings import OCIGenAIEmbeddings

# 1. Update your region identifier
oci_config["region"] = "us-chicago-1"

# 2. Update the Inference Service Endpoint for Chicago
chicago_endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

# 3. Re-initialize the Inference Client pointing to Chicago
gen_ai_inference_client = GenerativeAiInferenceClient(
    oci_config, 
    service_endpoint=chicago_endpoint
)

# 4. Use the full Cohere English V3 model (1024 dims) now that we are in Chicago
model_id = "cohere.embed-english-v3.0"

embeddings = OCIGenAIEmbeddings(
    model_id=model_id,
    compartment_id=oci_config["tenancy"],
    client=gen_ai_inference_client
)
print("‚úÖ Successfully re-routed to Chicago Inference Endpoint!")

‚úÖ Successfully re-routed to Chicago Inference Endpoint!


In [81]:
cursor = conn.cursor()
cursor.execute("DROP TABLE IF EXISTS PROPTECH_KNOWLEDGE PURGE")
cursor.execute("""
    CREATE TABLE PROPTECH_KNOWLEDGE (
        id VARCHAR2(64) PRIMARY KEY,
        text CLOB,
        metadata JSON,
        embedding VECTOR(1024, FLOAT32)
    )
""")
conn.commit()
print("‚úÖ Table reset for 1024-dimensional Chicago embeddings.")

‚úÖ Table reset for 1024-dimensional Chicago embeddings.


In [82]:
import oci
from oci.generative_ai_inference import GenerativeAiInferenceClient
from langchain_community.embeddings import OCIGenAIEmbeddings

# 1. Target the Chicago Inference Endpoint
chicago_endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"

# 2. Re-initialize the Inference Client for the new region
gen_ai_inference_client = GenerativeAiInferenceClient(
    oci_config, 
    service_endpoint=chicago_endpoint
)

# 3. Setup the 1024-dimension Embeddings model
# Chicago explicitly supports this model on-demand
model_id = "cohere.embed-english-v3.0"

embeddings = OCIGenAIEmbeddings(
    model_id=model_id,
    compartment_id=oci_config["tenancy"],
    client=gen_ai_inference_client
)
print("‚úÖ AI Librarian successfully re-routed to Chicago!")

‚úÖ AI Librarian successfully re-routed to Chicago!


In [83]:
cursor = conn.cursor()
try:
    cursor.execute("DROP TABLE IF EXISTS PROPTECH_KNOWLEDGE PURGE")
    cursor.execute("""
        CREATE TABLE PROPTECH_KNOWLEDGE (
            id VARCHAR2(64) PRIMARY KEY,
            text CLOB,
            metadata JSON,
            embedding VECTOR(1024, FLOAT32)
        )
    """)
    conn.commit()
    print("‚úÖ Database table ready for 1024-dimension Chicago vectors.")
except Exception as e:
    print(f"‚ùå Table setup failed: {e}")
finally:
    cursor.close()

‚úÖ Database table ready for 1024-dimension Chicago vectors.


In [84]:
from langchain_community.vectorstores import OracleVS
from langchain_community.vectorstores.utils import DistanceStrategy

try:
    print(f"üöÄ Ingesting {len(all_chunks)} chunks into Oracle 23ai via Chicago...")
    
    # Connect Python to the table
    vector_store = OracleVS(
        client=conn,
        embedding_function=embeddings,
        table_name="PROPTECH_KNOWLEDGE",
        distance_strategy=DistanceStrategy.COSINE,
        params={"embedding_dim": 1024} # Bypasses automatic API dimension checks
    )
    
    # The actual data transfer
    vector_store.add_documents(all_chunks)
    print("\nüèÜ MISSION ACCOMPLISHED!")
    print(f"Successfully stored {len(all_chunks)} vectors in your PropTech Knowledge Base.")
    
except Exception as e:
    print(f"‚ùå Final Ingestion Failed: {e}")
    print("üí° Tip: If you see a 401/Authentication error, re-run your oci_config cell.")

üöÄ Ingesting 496 chunks into Oracle 23ai via Chicago...

üèÜ MISSION ACCOMPLISHED!
Successfully stored 496 vectors in your PropTech Knowledge Base.


In [85]:
!pip install langchain-classic



In [86]:
# 1. New imports for the Chat-based OCI class
from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate

# 2. Setup the "Brain" using the CHAT endpoint in Chicago
# We use 'cohere.command-r-08-2024' as it is the stable on-demand standard
llm = ChatOCIGenAI(
    model_id="cohere.command-r-08-2024", 
    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
    compartment_id=oci_config["tenancy"],
    client=gen_ai_inference_client,
    model_kwargs={"max_tokens": 1000, "temperature": 0.7}
)

# 3. Re-initialize the Chain
system_prompt = (
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, say that you don't know. \n\n {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# 4. Re-link the modernized retrieval chain
question_answer_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(vector_store.as_retriever(), question_answer_chain)

print("‚úÖ Chatbot converted to modern CHAT endpoint. Ready for Chicago!")

‚úÖ Chatbot converted to modern CHAT endpoint. Ready for Chicago!


In [87]:
import sys

print("--- üè† PROPTECH AI KNOWLEDGE ASSISTANT (CHICAGO) ---")
print("I'm ready! Ask me anything about your podcast transcripts.")
print("(Type 'quit' or 'exit' to end the session)\n")

while True:
    # 1. Get user input
    user_input = input("You: ")
    
    # 2. Check for exit command
    if user_input.lower() in ['quit', 'exit', 'bye']:
        print("AI: Goodbye! Happy PropTech innovating!")
        break
        
    if not user_input.strip():
        continue

    try:
        # 3. Query the modernized Chicago chain
        # The modern chain uses 'invoke' and returns a dictionary
        result = qa_chain.invoke({"input": user_input})
        
        # 4. Print the answer
        print(f"\nAI: {result['answer']}\n")
        print("-" * 50)
        
    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        print("üí° Hint: If the connection timed out, re-run your 'conn' cell.\n")

--- üè† PROPTECH AI KNOWLEDGE ASSISTANT (CHICAGO) ---
I'm ready! Ask me anything about your podcast transcripts.
(Type 'quit' or 'exit' to end the session)



You:  what is proptech



AI: is short for Property Technology and refers to the use of digital technology to transform how real estate is bought, sold, managed, financed, and operated. It represents the intersection of real estate and modern technologies such as cloud computing, data analytics, artificial intelligence, automation, and software platforms. PropTech solutions aim to improve efficiency, transparency, decision-making, and user experience across the entire real estate lifecycle.

--------------------------------------------------


You:  exit


AI: Goodbye! Happy PropTech innovating!
