# Azure OpenAI API LangChain RAG

In [2]:
from azure_oai_password import endpoint, deployment, model_version, subscription_key, embedding_key, embedding_endpoint

import os
import shutil
import numpy as np
import pandas as pd

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import AzureOpenAIEmbeddings
from langchain.vectorstores import Chroma
from openai import AzureOpenAI

# Document Loading

In [3]:
loader=PyPDFLoader(r"C:\Users\jobake\Desktop\Uno_Rules.pdf")

In [4]:
pages = loader.load()

In [5]:
len(pages)

5

In [6]:
print(pages[0].page_content[0:500])

U N O
R U L E S
RULES.ORGUNO
THE OFFICIAL
THE RULES OF UNO ARE SIMPLE.HOWEVER, WE HAVE MADE IT EVENEASIER TO SURVEY THE RULES ANDADDED AN FAQ AT THE END! CHECK OUT UNORULES.ORG FOR MORE!
RULES.ORGUNO
PRESENTS


In [7]:
pages[0].metadata

{'producer': '3-Heights™ PDF Optimization Shell 6.3.1.5 (http://www.pdf-tools.com)',
 'creator': 'Canva',
 'creationdate': '2021-03-10T18:14:02+00:00',
 'keywords': 'DAEXabaDBgY,BADJP-KWezs',
 'author': 'Kamil Mastej',
 'title': 'Uno Rules PDF - unorules.org',
 'moddate': '2021-03-10T18:15:45+00:00',
 'source': 'C:\\Users\\jobake\\Desktop\\Uno_Rules.pdf',
 'total_pages': 5,
 'page': 0,
 'page_label': '1'}

# Document Splitting

In [8]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=500,
    length_function=len,
    add_start_index=True
)

In [9]:
splits = text_splitter.split_documents(pages)

In [10]:
len(splits)

16

# Embeddings

In [11]:
embedding = AzureOpenAIEmbeddings(api_key=embedding_key, azure_endpoint=embedding_endpoint, model="text-embedding-3-large")

# Vector Stores

In [12]:
vector_dir = r"C:\Users\jobake\Desktop\chroma_dir"

In [13]:
if os.path.exists(vector_dir):
    shutil.rmtree(vector_dir)
    
os.makedirs(vector_dir)

In [14]:
vector_db = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=vector_dir
)

In [15]:
if len(splits)==vector_db._collection.count():
    print("Splits length equal to number of Embeddings at", len(splits))
else:
    print("Splits to Embeddings Error")

Splits length equal to number of Embeddings at 16


# Similarity Search

In [16]:
query_text = "When do I say uno?"
sims = vector_db.similarity_search_with_relevance_scores(query_text, k=3)

In [17]:
sims_results = []

for sim,score in sims:
    results_dict = {"page_content":sim.page_content, "score":score, **sim.metadata}
    sims_results.append(results_dict)

In [18]:
sims_df = pd.DataFrame().from_dict(sims_results)
sims_df

Unnamed: 0,page_content,score,author,creationdate,creator,keywords,moddate,page,page_label,producer,source,start_index,title,total_pages
0,You may not catch a player for failure to say ...,0.449086,Kamil Mastej,2021-03-10T18:14:02+00:00,Canva,"DAEXabaDBgY,BADJP-KWezs",2021-03-10T18:15:45+00:00,3,4,3-Heights™ PDF Optimization Shell 6.3.1.5 (htt...,C:\Users\jobake\Desktop\Uno_Rules.pdf,1203,Uno Rules PDF - unorules.org,5
1,"If you are not caught before the next player, ...",0.434668,Kamil Mastej,2021-03-10T18:14:02+00:00,Canva,"DAEXabaDBgY,BADJP-KWezs",2021-03-10T18:15:45+00:00,3,4,3-Heights™ PDF Optimization Shell 6.3.1.5 (htt...,C:\Users\jobake\Desktop\Uno_Rules.pdf,693,Uno Rules PDF - unorules.org,5
2,Q: How does a challenge work in Uno?A: A chall...,0.427216,Kamil Mastej,2021-03-10T18:14:02+00:00,Canva,"DAEXabaDBgY,BADJP-KWezs",2021-03-10T18:15:45+00:00,4,5,3-Heights™ PDF Optimization Shell 6.3.1.5 (htt...,C:\Users\jobake\Desktop\Uno_Rules.pdf,1186,Uno Rules PDF - unorules.org,5


In [19]:
print("Largest Similarity Search Score", "\n")
print(sims_df["score"][0], "\n")
print(sims_df["page_content"][0])

Largest Similarity Search Score 

0.44908581012776283 

You may not catch a player for failure to say "UNO!" until his/her second-to-lastcard touches the DISCARD pile. Also, you may not catch a player for failure to sayit after the next player begins his/her turn. 
"Beginning a turn" is defined as either drawing a card from the DRAW pile ordrawing a card from your hand to play.
If the last card played in a hand is a Draw 2 card or Wild Draw 4 Card, the nextplayer must draw 2 or 4 cards, depending upon the card played. These cards arecounted when points are totaled.
If no one is out of cards by the time the DRAW pile is depleted, reshuffle theDISCARD pile and continue play.
SCORING AND WINNING
If you are the first to get rid of all your cards, you get the points for the cards left inthe other player's hands. Record the score after each hand. Each card is worth thefollowing:
0-9+2+2 +4+4
20 PointsFace Value 20 Points 20 Points 50 Points 50 Points


# Prepare the LLM Prompt using RAG

In [20]:
prompt = """Use the following pieces of context to answer the question at the end. 
Use three sentences maximum. 
Keep the answer as concise as possible. 
Always say "Hope this helps!" at the end of the answer. \n
"""

In [21]:
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in sims])

In [22]:
prompt_context = prompt + context_text
print(prompt_context)

Use the following pieces of context to answer the question at the end. 
Use three sentences maximum. 
Keep the answer as concise as possible. 
Always say "Hope this helps!" at the end of the answer. 

You may not catch a player for failure to say "UNO!" until his/her second-to-lastcard touches the DISCARD pile. Also, you may not catch a player for failure to sayit after the next player begins his/her turn. 
"Beginning a turn" is defined as either drawing a card from the DRAW pile ordrawing a card from your hand to play.
If the last card played in a hand is a Draw 2 card or Wild Draw 4 Card, the nextplayer must draw 2 or 4 cards, depending upon the card played. These cards arecounted when points are totaled.
If no one is out of cards by the time the DRAW pile is depleted, reshuffle theDISCARD pile and continue play.
SCORING AND WINNING
If you are the first to get rid of all your cards, you get the points for the cards left inthe other player's hands. Record the score after each hand. Ea

In [23]:
messages = [
    {"role": "system", "content": prompt_context},
    {"role": "user", "content": query_text}
]

# Initialize Client Service with Key-Based Authentication

In [24]:
client = AzureOpenAI(
    azure_endpoint = endpoint,
    api_key = subscription_key,
    api_version = model_version
)

# Generate Completion

In [25]:
completion = client.chat.completions.create(
    model = deployment,
    messages = messages,
    max_tokens = 800,
    temperature = 0,
    top_p = 0.95,
    frequency_penalty = 0,
    presence_penalty = 0,
    stop = None,
    stream = False
)

In [26]:
print("Response:","\n   ", completion.choices[0].message.content)

Response: 
    You must say "UNO" before playing your second-to-last card. If you fail to do so and another player catches you with just one card before the next player begins their turn, you must draw four more cards from the DRAW pile. Hope this helps!
