In [1]:
pip install openai numpy scikit-learn python-dotenv

Collecting openai
  Downloading openai-2.14.0-py3-none-any.whl.metadata (29 kB)
Collecting numpy
  Using cached numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting distro<2,>=1.7.0 (from openai)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting jiter<1,>=0.10.0 (from openai)
  Downloading jiter-0.12.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Using cached pydantic-2.12.5-py3-none-any.whl.metadata (90 kB)
Collecting sniffio (from openai)
  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting tqdm>4 (from openai)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->openai)
  Using cached annotated_types-0

In [2]:
import os
import numpy as np
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
from getpass import getpass

In [None]:
client = OpenAI(api_key=api_key)

<openai.OpenAI object at 0x115c69150>


In [8]:
def get_embedding(text):
    """
    Turns text into a vector using OpenAI.
    """
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

In [9]:
word_vec = get_embedding("Apple")

print(f"Vector length: {len(word_vec)}") # Should be 1536 for this model
print(f"First 5 dimensions: {word_vec[:5]}")

Vector length: 1536
First 5 dimensions: [0.009197094477713108, -0.03516796976327896, -0.025027744472026825, 0.039801329374313354, 0.001860940014012158]


In [None]:
# Compare 3 words
word_1 = get_embedding("King")
word_2 = get_embedding("Queen")
word_3 = get_embedding("Apple")

# Calculate similarity
score_king_queen = cosine_similarity([word_1], [word_2])[0][0]
score_king_apple = cosine_similarity([word_1], [word_3])[0][0]

print(f"Similarity (King vs Queen): {score_king_queen:.4f}")
print(f"Similarity (King vs Apple): {score_king_apple:.4f}")

Similarity (King vs Queen): 0.7240
Similarity (King vs Apple): 0.3318


In [11]:
documents = [
    "The golden retriever is a popular dog breed known for its gentle nature.",
    "Python involves dynamic typing and garbage collection.",
    "The weather in San Francisco is often foggy in the summer.",
    "Use 'def' to define a function in Python."
]

# 1. Pre-calculate embeddings for our database
doc_embeddings = [get_embedding(doc) for doc in documents]

def retrieve_context(query, docs, doc_vecs):
    """
    Finds the most relevant document for the query.
    """
    # Embed the query
    query_vec = get_embedding(query)
    
    # Calculate similarities
    similarities = cosine_similarity([query_vec], doc_vecs)[0]
    
    # Find the index of the highest score
    best_idx = np.argmax(similarities)
    
    return docs[best_idx], similarities[best_idx]

# --- Test the Retrieval ---
query = "How do I write a function?"
context, score = retrieve_context(query, documents, doc_embeddings)

print(f"User Query: {query}")
print(f"Best Match Score: {score:.4f}")
print(f"Retrieved Context: {context}")

User Query: How do I write a function?
Best Match Score: 0.4398
Retrieved Context: Use 'def' to define a function in Python.


In [12]:
def ask_rag(query):
    # Step 1: Retrieve relevant information (The "R")
    best_context, score = retrieve_context(query, documents, doc_embeddings)
    
    print(f"[Debug] Retrieved Info: {best_context}")
    
    # Step 2: Generate Answer (The "G")
    # We explicitly tell GPT to use the provided context.
    prompt = f"""
    You are a helpful assistant. Answer the user's question based ONLY on the context below.
    
    Context:
    {best_context}
    
    Question: 
    {query}
    """
    
    response = client.chat.completions.create(
        model="gpt-4o-mini", # or gpt-3.5-turbo
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    
    return response.choices[0].message.content

# --- Run the Full RAG System ---
answer = ask_rag("Tell me about coding functions")
print("\n--- Final Answer ---")
print(answer)

[Debug] Retrieved Info: Use 'def' to define a function in Python.

--- Final Answer ---
In Python, you use the keyword 'def' to define a function. Functions are blocks of code designed to perform a specific task. You can call a function whenever you need to execute that task. Functions can take inputs (called parameters) and can return outputs.
