# Retrieval Augmented Generation (RAG) and Vector Databases

In [1]:
!pip install getenv openai==1.12.0

Collecting getenv
  Downloading getenv-0.2.0-py3-none-any.whl (2.6 kB)
Collecting openai==1.12.0
  Using cached openai-1.12.0-py3-none-any.whl (226 kB)
Installing collected packages: getenv, openai
  Attempting uninstall: openai
    Found existing installation: openai 1.14.2
    Uninstalling openai-1.14.2:
      Successfully uninstalled openai-1.14.2
Successfully installed getenv-0.2.0 openai-1.12.0



[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [89]:
import os
import pandas as pd
import numpy as np
import openai
from dotenv import load_dotenv

## Creating our Knowledge base

Creating a Azure Cosmos DB database


In [3]:
pip install azure-cosmos

Collecting azure-cosmos
  Downloading azure_cosmos-4.6.0-py3-none-any.whl (243 kB)
                                              0.0/243.1 kB ? eta -:--:--
     -----------------                      112.6/243.1 kB 2.2 MB/s eta 0:00:01
     ------------------------------------   235.5/243.1 kB 3.6 MB/s eta 0:00:01
     -------------------------------------- 243.1/243.1 kB 2.5 MB/s eta 0:00:00
Collecting azure-core>=1.25.1 (from azure-cosmos)
  Downloading azure_core-1.30.1-py3-none-any.whl (193 kB)
                                              0.0/193.4 kB ? eta -:--:--
     -------------------------------------- 193.4/193.4 kB 5.7 MB/s eta 0:00:00
Installing collected packages: azure-core, azure-cosmos
Successfully installed azure-core-1.30.1 azure-cosmos-4.6.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
## create your cosmoss db on Azure CLI using the following commands
## az login
## az group create -n <resource-group-name> -l <location>
## az cosmosdb create -n <cosmos-db-name> -r <resource-group-name>
## az cosmosdb list-keys -n <cosmos-db-name> -g <resource-group-name>

## Once done navigate to data explorer and create a new database and a new container


In [94]:
from azure.cosmos import CosmosClient

load_dotenv(r"C:\Users\ankshah0\Documents\Sapient Documents\GenAI\generative-ai-for-beginners\.env")

# Initialize Cosmos Client
url = os.getenv('COSMOS_DB_ENDPOINT')
key = os.getenv('COSMOS_DB_KEY')

client = CosmosClient(url, credential=key)

# Select database
database_name = 'rag-cosmos-db'
database = client.get_database_client(database_name)

# Select container
container_name = 'data'
container = database.get_container_client(container_name)

In [95]:
epoint=os.getenv('AZURE_OPENAI_ENDPOINT')

In [45]:
import pandas as pd

# Initialize an empty DataFrame
df = pd.DataFrame(columns=['path', 'text'])

# splitting our data into chunks
#data_paths= ["data/frameworks.md?WT.mc_id=academic-105485-koreyst", "data/own_framework.md?WT.mc_id=academic-105485-koreyst", "data/perceptron.md?WT.mc_id=academic-105485-koreyst"]
data_paths= ["data/frameworks.md", "data/own_framework.md", "data/perceptron.md"]

for path in data_paths:
    with open(path, 'r', encoding='utf-8') as file:
        file_content = file.read()
    
    # Append the file path and text to the DataFrame
    new_row = pd.DataFrame([{'path': path, 'text': file_content}])
    df = pd.concat([df, new_row], ignore_index=True)

df.head()

Unnamed: 0,path,text
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...


In [77]:
def split_text(text, max_length, min_length):
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        if len(' '.join(current_chunk)) < max_length and len(' '.join(current_chunk)) > min_length:
            chunks.append(' '.join(current_chunk))
            current_chunk = []

    # If the last chunk didn't reach the minimum length, add it anyway
    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

# Assuming analyzed_df is a pandas DataFrame and 'output_content' is a column in that DataFrame
splitted_df = df.copy()
splitted_df['chunks'] = splitted_df['text'].apply(lambda x: split_text(x, 400, 300))

splitted_df

Unnamed: 0,path,text,chunks
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,[# Neural Network Frameworks As we have learne...
1,data/own_framework.md,# Introduction to Neural Networks. Multi-Layer...,[# Introduction to Neural Networks. Multi-Laye...
2,data/perceptron.md,# Introduction to Neural Networks: Perceptron\...,[# Introduction to Neural Networks: Perceptron...


In [60]:
# Assuming 'chunks' is a column of lists in the DataFrame splitted_df, we will split the chunks into different rows
flattened_df = splitted_df.explode('chunks')

flattened_df.head()

Unnamed: 0,path,text,chunks
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,# Neural Network Frameworks As we have learned...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,descent optimization While the `numpy` library...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,should give us the opportunity to compute grad...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,those computations on GPUs is very important. ...
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,"API, there is also higher-level API, called Ke..."


## Converting our text to embeddings

Converting out text  to embeddings, and storing them in our database in chunks

In [55]:
openai.api_type = "azure"
openai.api_key = os.getenv("AZURE_OPENAI_KEY") 
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") 
openai.api_version = "2023-07-01-preview"

In [56]:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"))

In [72]:
def create_embeddings(text, model="my-text-embedding-ada-002"):
    # Create embeddings for each document chunk
    embeddings = openai.embeddings.create(input = text, model=model).data[0].embedding
    return embeddings

In [73]:
#embeddings for the first chunk
create_embeddings(flattened_df['chunks'][0])

[-0.017071647569537163,
 0.002827746095135808,
 0.025336062535643578,
 -0.038865819573402405,
 0.006856478750705719,
 0.003945613279938698,
 -0.006249200087040663,
 -0.0032145045697689056,
 -0.0029091688338667154,
 -0.029339350759983063,
 0.03490324318408966,
 0.020450694486498833,
 0.001463914173655212,
 0.0030346957501024008,
 -0.01465610507875681,
 -0.011019219644367695,
 0.02207915112376213,
 0.009105783887207508,
 -0.029339350759983063,
 -0.020572828128933907,
 -0.03555462509393692,
 -0.003704737639054656,
 0.01300729252398014,
 -0.03441470488905907,
 -0.03042498789727688,
 -0.0015835039084777236,
 0.01544319186359644,
 -0.043642621487379074,
 -0.007538394536823034,
 -0.014235420152544975,
 0.01971788890659809,
 0.012606964446604252,
 -0.012668031267821789,
 -0.015592467039823532,
 -0.004719130229204893,
 0.011114212684333324,
 0.001191656687296927,
 0.008203347213566303,
 -0.00034668302396312356,
 -0.0017929980531334877,
 0.04033143073320389,
 0.011290628463029861,
 -0.0097910920

In [74]:
cat = create_embeddings("cat")
cat

[-0.007107354234904051,
 -0.01739795319736004,
 -0.009769510477781296,
 -0.03062720224261284,
 -0.012626631185412407,
 0.0031548854894936085,
 -0.0050478167831897736,
 -0.04123328998684883,
 -0.014590458944439888,
 -0.021311428397893906,
 0.019170360639691353,
 0.05081846937537193,
 -0.001183082116767764,
 0.002591259777545929,
 -0.0383690781891346,
 -0.006128985434770584,
 0.03544815257191658,
 -0.004604714922606945,
 0.002406929386779666,
 -0.013463207520544529,
 -0.018957670778036118,
 0.00901092030107975,
 0.01583823375403881,
 -0.008734424598515034,
 -0.014562101103365421,
 0.007082540541887283,
 0.013122905977070332,
 -0.013243429362773895,
 0.0029439690988510847,
 0.00485285185277462,
 0.00407653721049428,
 -0.01673152670264244,
 -0.015753159299492836,
 -0.04304823651909828,
 -0.027110746130347252,
 -0.004317584913223982,
 0.008011282421648502,
 -0.009854585863649845,
 0.02206292934715748,
 -0.009060547687113285,
 0.004930838011205196,
 0.00023971812333911657,
 -0.01208072993904

In [75]:
# create embeddings for the whole data chunks and store them in a list

embeddings = []
for chunk in flattened_df['chunks']:
    embeddings.append(create_embeddings(chunk))

# store the embeddings in the dataframe
flattened_df['embeddings'] = embeddings

flattened_df.head()

Unnamed: 0,path,text,chunks,embeddings
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,# Neural Network Frameworks As we have learned...,"[-0.017071647569537163, 0.002827746095135808, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,descent optimization While the `numpy` library...,"[-0.01478797011077404, 0.0016882119234651327, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,should give us the opportunity to compute grad...,"[-0.03678734600543976, -0.020647864788770676, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,those computations on GPUs is very important. ...,"[-0.031713567674160004, -0.011103862896561623,..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,"API, there is also higher-level API, called Ke...","[-0.008054508827626705, -0.033310648053884506,..."


# Retrieval

Vector search and similiarity between our prompt and the database

### Creating an search index and reranking

In [78]:
from sklearn.neighbors import NearestNeighbors

embeddings = flattened_df['embeddings'].to_list()

# Create the search index
nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(embeddings)

# To query the index, you can use the kneighbors method
distances, indices = nbrs.kneighbors(embeddings)

# Store the indices and distances in the DataFrame
flattened_df['indices'] = indices.tolist()
flattened_df['distances'] = distances.tolist()

flattened_df.head()

Unnamed: 0,path,text,chunks,embeddings,indices,distances
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,# Neural Network Frameworks As we have learned...,"[-0.017071647569537163, 0.002827746095135808, ...","[0, 2, 11, 3, 1]","[0.0, 0.5233715652770051, 0.5282499261628433, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,descent optimization While the `numpy` library...,"[-0.01478797011077404, 0.0016882119234651327, ...","[1, 0, 32, 2, 50]","[0.0, 0.5701155822856672, 0.592189339823302, 0..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,should give us the opportunity to compute grad...,"[-0.03678734600543976, -0.020647864788770676, ...","[2, 3, 0, 5, 1]","[0.0, 0.5057228769731683, 0.5233715652770051, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,those computations on GPUs is very important. ...,"[-0.031713567674160004, -0.011103862896561623,...","[3, 2, 0, 10, 11]","[0.0, 0.5057228769731683, 0.5461794498898873, ..."
0,data/frameworks.md,# Neural Network Frameworks\n\nAs we have lear...,"API, there is also higher-level API, called Ke...","[-0.008054508827626705, -0.033310648053884506,...","[4, 12, 10, 8, 9]","[0.0, 0.5201643386566065, 0.5530482770720577, ..."


In [79]:
# Your text question
question = "what is a perceptron?"

# Convert the question to a query vector
query_vector = create_embeddings(question)  # You need to define this function

# Find the most similar documents
distances, indices = nbrs.kneighbors([query_vector])

index = []
# Print the most similar documents
for i in range(3):
    index = indices[0][i]
    for index in indices[0]:
        print(flattened_df['chunks'].iloc[index])
        print(flattened_df['path'].iloc[index])
        print(flattened_df['distances'].iloc[index])
    else:
        print(f"Index {index} not found in DataFrame")

in our model, in which case the input vector would be a vector of size N. A perceptron is a **binary classification** model, i.e. it can distinguish between two classes of input data. We will assume that for each input vector x the output of our perceptron would be either +1 or -1, depending on the class.
data/perceptron.md
[0.0, 0.5280529852680852, 0.5362877369675818, 0.5444286682491558, 0.5542846368687017]
# Introduction to Neural Networks: Perceptron One of the first attempts to implement something similar to a modern neural network was done by Frank Rosenblatt from Cornell Aeronautical Laboratory in 1957. It was a hardware implementation called "Mark-1", designed to recognize primitive geometric figures,
data/perceptron.md
[0.0, 0.45846243712732504, 0.5236378295599984, 0.5630992993431824, 0.5635853484842597]
user to adjust the resistance of a circuit. > The New York Times wrote about perceptron at that time: *the embryo of an electronic computer that [the Navy] expects will be able

## Putting it all together to answer a question

In [82]:
import os
import openai

openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_version = "2023-07-01-preview"
openai.api_key = os.getenv("AZURE_OPENAI_KEY")

print(openai.api_base)

https://azure-openai-genai-test-001.openai.azure.com/


In [84]:
user_input = "what is a perceptron?"

def chatbot(user_input):
    # Convert the question to a query vector
    query_vector = create_embeddings(user_input)

    # Find the most similar documents
    distances, indices = nbrs.kneighbors([query_vector])

    # add documents to query  to provide context
    history = []
    for index in indices[0]:
        history.append(flattened_df['chunks'].iloc[index])

    # combine the history and the user input
    history.append(user_input)

    # create a message object
    messages=[
        {"role": "system", "content": "You are an AI assiatant that helps with AI questions."},
        {"role": "user", "content": history[-1]}
    ]

    # use chat completion to generate a response
    response = openai.chat.completions.create(
        model="my-gpt-35-deploy",
        temperature=0.7,
        max_tokens=800,
        messages=messages
    )

    return response.choices[0].message

chatbot(user_input)

ChatCompletionMessage(content='A perceptron is a type of artificial neuron, which is a fundamental building block of artificial neural networks. It is a mathematical model inspired by the biological neuron in the human brain. \n\nA perceptron takes multiple inputs, each multiplied by a corresponding weight, and sums them up. It then applies an activation function to the result to produce an output. The activation function is typically a simple threshold function that determines whether the perceptron should fire or not based on the weighted sum of inputs.\n\nPerceptrons are used to approximate functions or classify inputs into different categories based on the learned weights. They are often arranged in layers to form artificial neural networks, enabling more complex tasks like pattern recognition and decision-making.\n\nThe perceptron algorithm was first proposed by Frank Rosenblatt in 1957 and is considered one of the earliest forms of machine learning.', role='assistant', function_c

## Testing and evaluation

A basic example of how you can use Mean Average Precision (MAP) to evaluate the responses of your model based on their relevance.

In [85]:
from sklearn.metrics import average_precision_score

# Define your test cases
test_cases = [
    {
        "query": "What is a perceptron?",
        "relevant_responses": ["A perceptron is a type of artificial neuron.", "It's a binary classifier used in machine learning."],
        "irrelevant_responses": ["A perceptron is a type of fruit.", "It's a type of car."]
    },
    {
        "query": "What is machine learning?",
        "relevant_responses": ["Machine learning is a method of data analysis that automates analytical model building.", "It's a branch of artificial intelligence based on the idea that systems can learn from data, identify patterns and make decisions with minimal human intervention."],
        "irrelevant_responses": ["Machine learning is a type of fruit.", "It's a type of car."]
    },
    {
        "query": "What is deep learning?",
        "relevant_responses": ["Deep learning is a subset of machine learning in artificial intelligence (AI) that has networks capable of learning unsupervised from data that is unstructured or unlabeled.", "It's a type of machine learning."],
        "irrelevant_responses": ["Deep learning is a type of fruit.", "It's a type of car."]
    },
    {
        "query": "What is a neural network?",
        "relevant_responses": ["A neural network is a series of algorithms that endeavors to recognize underlying relationships in a set of data through a process that mimics the way the human brain operates.", "It's a type of machine learning."],
        "irrelevant_responses": ["A neural network is a type of fruit.", "It's a type of car."]
    }
]

# Initialize the total average precision
total_average_precision = 0

# Test the RAG application
for test_case in test_cases:
    query = test_case["query"]
    relevant_responses = test_case["relevant_responses"]
    irrelevant_responses = test_case["irrelevant_responses"]

    # Generate a response using your RAG application
    response = chatbot(query) 

    # Create a list of all responses and a list of true binary labels
    all_responses = relevant_responses + irrelevant_responses
    true_labels = [1] * len(relevant_responses) + [0] * len(irrelevant_responses)

    # Create a list of predicted scores based on whether the response is the generated response
    predicted_scores = [1 if resp == response else 0 for resp in all_responses]

    # Calculate the average precision for this query
    average_precision = average_precision_score(true_labels, predicted_scores)

    # Add the average precision to the total average precision
    total_average_precision += average_precision

# Calculate the mean average precision
mean_average_precision = total_average_precision / len(test_cases)

In [86]:
mean_average_precision

0.5