# Segment 4 Lab 2

## RAG (Retrieval Augmented Generation

For our 2nd agent, we will be asking GPT-4o-mini to estimate the price of one of our deals.

It turns out that LLMs are really good at this! Out of the box, GPT-4o achieves an average error of $76, much better than our Neural Network and traditional solutions.

But we can do even better: we'll provide it with some context, in the form of 5 similar products from our training dataset

Again I'll be going quite quickly through this - the idea is for you to run this yourself.

In [None]:
# imports

import os
import re
import math
import json
from tqdm import tqdm
import random
from dotenv import load_dotenv
from huggingface_hub import login
import numpy as np
import pickle
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from items import Item
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from testing import Tester
from openai import OpenAI

In [None]:
# environment

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
DB = "products_vectorstore"

In [None]:
# Log in to HuggingFace
# If you don't have a HuggingFace account, you can set one up for free at www.huggingface.co
# And then add the HF_TOKEN to your .env file as explained in the project README

hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

In [None]:
# Load the training data

with open('../train.pkl', 'rb') as file:
    train = pickle.load(file)

train[0]

# Now create a Chroma Datastore

Now we will use the free, open-source Vector database Chroma.  
We will create a Chroma datastore with 400,000 products from our training dataset.

In [None]:
client = chromadb.PersistentClient(path=DB)

# Introducing the SentenceTransfomer Encoding LLM

The all-MiniLM is a very useful model from HuggingFace that maps sentences & paragraphs to 384 dimensional vectors and is ideal for tasks like semantic search.

https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2

It can run pretty quickly locally.

As an alternative, OpenAI provides a closed-source Embeddings model. Benefits compared to OpenAI embeddings:
1. It's free and fast!
3. We can run it locally, so the data never leaves our box - might be useful if you're building a personal RAG

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
# Pass in a list of texts, get back a numpy array of vectors

vector = model.encode(["Hello Software Engineers becoming Data Scientists!!"])[0]
print(vector.shape)
vector

In [None]:
# Check if the collection exists; if not, create it

collection_name = "products"
existing_collection_names = [collection.name for collection in client.list_collections()]

if collection_name not in existing_collection_names:
    collection = client.create_collection(collection_name)
    for i in tqdm(range(0, len(train), 1000)):
        documents = [item.text for item in train[i: i+1000]]
        vectors = model.encode(documents).astype(float).tolist()
        metadatas = [{"category": item.category, "price": item.price} for item in train[i: i+1000]]
        ids = [f"doc_{j}" for j in range(i, i+1000)]
        collection.add(
            ids=ids,
            documents=documents,
            embeddings=vectors,
            metadatas=metadatas
        )
collection = client.get_or_create_collection(collection_name)

# Let's visualize the vectorized data

In [None]:
# It is very fun turning this up to 400_000 and seeing the full dataset visualized,
# but it almost crashes my box every time so do that at your own risk!! 10_000 is safe!

MAXIMUM_DATAPOINTS = 5_000

In [None]:
CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
COLORS = ['cyan', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'red']

In [None]:
# Prework
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)
vectors = np.array(result['embeddings'])
documents = result['documents']
categories = [metadata['category'] for metadata in result['metadatas']]
colors = [COLORS[CATEGORIES.index(c)] for c in categories]

In [None]:
# Let's try a 2D chart
# TSNE stands for t-distributed Stochastic Neighbor Embedding - it's a common technique for reducing dimensionality of data

tsne = TSNE(n_components=2, random_state=42, n_jobs=-1)
reduced_vectors = tsne.fit_transform(vectors)

In [None]:
# Create the 2D scatter plot
fig = go.Figure(data=[go.Scatter(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    mode='markers',
    marker=dict(size=3, color=colors, opacity=0.7),
    text=[f"Category: {c}<br>Text: {d[:50]}..." for c, d in zip(categories, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='2D Chroma Vectorstore Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y'),
    width=1200,
    height=800,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
# Let's try 3D!

tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
reduced_vectors = tsne.fit_transform(vectors)

In [None]:
# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=3, color=colors, opacity=0.7),
    text=[f"Category: {c}<br>Text: {d[:50]}..." for c, d in zip(categories, documents)],
    hoverinfo='text'
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=1200,
    height=800,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
# And now - set up OpenAI

openai = OpenAI()

In [None]:
# Load in the test pickle file

with open('../test.pkl', 'rb') as file:
    test = pickle.load(file)

In [None]:
# We need to give some context to GPT-4o-mini by selecting 5 products with similar descriptions

def make_context(similars, prices):
    message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
    for similar, price in zip(similars, prices):
        message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
    return message

In [None]:
def messages_for(item, similars, prices):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = make_context(similars, prices)
    user_prompt += "And now the question for you:\n\n"
    user_prompt += item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

In [None]:
def vector(item):
    return model.encode([item.text])

In [None]:
def find_similars(item):
    vec = vector(item)
    results = collection.query(query_embeddings=vec.astype(float).tolist(), n_results=5)
    documents = results['documents'][0][:]
    prices = [m['price'] for m in results['metadatas'][0][:]]
    return documents, prices

In [None]:
print(test[1].text)

In [None]:
documents, prices = find_similars(test[1])

In [None]:
print(make_context(documents, prices))

In [None]:
# Utility function that extracts a price from a response from GPT-4o-mini

def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

In [None]:
get_price("blah blah the price is $99.99 blah")

In [None]:
# The function for gpt-4o-mini

def gpt_4o_mini_rag(item):
    documents, prices = find_similars(item)
    response = openai.chat.completions.create(
        model="gpt-4o-mini", 
        messages=messages_for(item, documents, prices),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
# What's the actual price of this per Amazon?

test[1].price

In [None]:
# OK, time for gpt-4o-mini plus RAG to try:

gpt_4o_mini_rag(test[1])

# Were you following that?

Let's do it again with some print statements.

This is a "DIY" version of RAG; we're not using an abstraction layer like langchain to build the prompt, we're simply doing it ourselves.

In [None]:
# The function for gpt-4o-mini, now with print statements

def gpt_4o_mini_rag_explainer(item):
    documents, prices = find_similars(item)
    print(f"Asking GPT-4o-mini to estimate the price of {item.title}")
    print(f"Given similar prices of these items:")
    for document, price in zip(documents, prices):
        similar = document.split("\n")[0]
        print(f"Similar item: {similar} costs ${price:.2f}")
    response = openai.chat.completions.create(
        model="gpt-4o-mini", 
        messages=messages_for(item, documents, prices),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    print(f"\n\nGPT-4o-mini reponded: {reply}")
    price = get_price(reply)
    print(f"Extracted price is {price:.2f}")
    return price

In [None]:
gpt_4o_mini_rag_explainer(test[1])

## We will kick off the next line then take a 5 minute break

## When we come back: unveiling a proprietary fine-tuned LLM

In [None]:
Tester.test(gpt_4o_mini_rag, test)

In [None]:
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [None]:
def simple_messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

In [None]:
def llama3_2_local(item):
    response = ollama_via_openai.chat.completions.create(
        model="llama3.2", 
        messages=simple_messages_for(item),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)
    

In [None]:
llama3_2_local(test[1])

In [None]:
Tester.test(llama3_2_local, test)

In [None]:
# And now let's add RAG:

def llama3_2_local_rag(item):
    documents, prices = find_similars(item)
    response = ollama_via_openai.chat.completions.create(
        model="llama3.2", 
        messages=messages_for(item, documents, prices),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
llama3_2_local_rag(test[1])

In [None]:
Tester.test(llama3_2_local_rag, test)