In [1]:
import os
import openai
import numpy as np
import pandas as pd
from dotenv import load_dotenv

load_dotenv()
assert os.getenv("OPENAI_API_KEY"), "Set OPENAI_API_KEY in .env file"

## LLM, no RAG

In [2]:
question = "What meeting room is at Monash University Building 60, 23 College Walk, Clayton?"
# question = "What meeting room is NOT at Monash University Building 60, 23 College Walk, Clayton?"

In [3]:
client = openai.Client()
response = client.chat.completions.create(model="gpt-4o",messages=[{"role": "user", "content": question}])
print(response.choices[0].message.content)

As of my last update in October 2023, specific details about meeting rooms within particular buildings at Monash University, such as Building 60 at 23 College Walk, Clayton, are not typically available in public databases or general informational resources. 

If you need to find out about a specific meeting room or arrange a booking, it would be best to consult Monash University’s internal resources or contact their facilities management office directly. Staff in the relevant department or the university's administrative support can provide accurate and up-to-date information regarding meeting room locations, usage, and availability. You might also check your university portal or any internal directory for such details.


## Embeddings

In [4]:
from langchain_openai import OpenAIEmbeddings

In [5]:
# Documents
doc1 = "The Monash Generator boardroom is located in Building 60, 23 College Walk, Clayton VIC 3168"
doc2 = "State Library Victoria meeting rooms for 2-4 people are located at 328 Swanston St, Melbourne VIC 3000"
doc3 = "Angie likes veggie pizza"
# etc...

In [6]:
embd = OpenAIEmbeddings()
question_embedding = embd.embed_query(question)

In [7]:
question_embedding

[-0.0036692980211228132,
 0.00024434563238173723,
 -0.01626996323466301,
 -0.002958474215120077,
 -0.010214799083769321,
 0.019337035715579987,
 -0.00908274669200182,
 -0.018362943083047867,
 -0.005564827471971512,
 0.006252615246921778,
 0.0005738419713452458,
 -0.006249324418604374,
 -0.008253452368080616,
 0.004472265485674143,
 -0.02021898329257965,
 -0.016572721302509308,
 0.013597792945802212,
 -0.018257636576890945,
 0.004893494304269552,
 -0.02490515448153019,
 -0.0066837165504693985,
 0.01829712651669979,
 -0.009596118703484535,
 0.01180098857730627,
 -0.02477351948618889,
 0.0037910593673586845,
 0.006456647999584675,
 3.249714427511208e-05,
 -0.0023743484634906054,
 0.01303176674991846,
 0.019363362342119217,
 -0.005117272026836872,
 -0.006877876818180084,
 -0.033171769231557846,
 0.002969992347061634,
 0.0004944502143189311,
 0.002297013532370329,
 0.014255963265895844,
 0.03053908981382847,
 0.01255788467824459,
 0.002668879460543394,
 -0.01384789776057005,
 -0.01517740078

In [8]:
len(question_embedding)

1536

In [9]:
doc1_embedding = embd.embed_query(doc1)
doc2_embedding = embd.embed_query(doc2)
doc3_embedding = embd.embed_query(doc3)

In [10]:

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

In [11]:
cosine_similarity(doc1_embedding, question_embedding) # Similarity with "The Monash Generator boardroom ..."

0.9107857035005803

In [12]:
cosine_similarity(doc2_embedding, question_embedding) # Similairty with "State Library Victoria meeting rooms ..."

0.8441989700897204

In [13]:
cosine_similarity(doc3_embedding, question_embedding) # Similarity with "Angie likes veggie pizza"

0.7131511373240288

## Retrieval from Vector Store, No LLM

In [14]:
from langchain_community.vectorstores import Chroma

In [15]:
docs = [
    doc1, # "The Monash Generator boardroom ..."
    doc2, # "State Library Victoria meeting rooms ..."
    doc3, # "Angie likes veggie pizza"
    # etc.
]

# Create Chroma vector store from the documents
vector_store = Chroma.from_texts(texts=docs, embedding=embd, collection_name="funFacts")
retriever = vector_store.as_retriever(search_kwargs={'k': 2}) # for demo purposes will just get two closest, default is 4

In [16]:
closest_docs = retriever.invoke(question)
closest_docs

[Document(page_content='The Monash Generator boardroom is located in Building 60, 23 College Walk, Clayton VIC 3168'),
 Document(page_content='State Library Victoria meeting rooms for 2-4 people are located at 328 Swanston St, Melbourne VIC 3000')]

## RAG (Retrieval + LLM)

In [17]:
prompt = f"""Use the following context to help answer the question

Context:
{"\n".join([doc.page_content for doc in closest_docs])}

Question: {question}
"""
print(prompt)

Use the following context to help answer the question

Context:
The Monash Generator boardroom is located in Building 60, 23 College Walk, Clayton VIC 3168
State Library Victoria meeting rooms for 2-4 people are located at 328 Swanston St, Melbourne VIC 3000

Question: What meeting room is at Monash University Building 60, 23 College Walk, Clayton?



In [18]:
response = client.chat.completions.create(model="gpt-4o",messages=[{"role": "user", "content": prompt}])
print(response.choices[0].message.content)

The Monash Generator boardroom is located at Monash University Building 60, 23 College Walk, Clayton VIC 3168.
