<a href="https://colab.research.google.com/github/andreuartigues/Deep-Learning/blob/main/RAG_LLm_easy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q --upgrade google-cloud-aiplatform
!pip install -q pypdf

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h

## Deploying Gemma to Vertex AI

In [2]:
GCP_PROJECT_ID = 'gdsc-llm-gcp'
PROJECT_NUMBER = '983720878576'
ENDPOINT = '1774723917416497152'

In [3]:
from google.colab import auth
auth.authenticate_user(project_id=GCP_PROJECT_ID)

In [4]:
from typing import Dict, List, Union
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value

def predict_custom_trained_model_sample(
    project: str,
    endpoint_id: str,
    instances: Union[Dict, List[Dict]],
    location: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
    """
    `instances` can be either single instance of type dict or a list
    of instances.
    """
    client_options = {"api_endpoint": api_endpoint}

    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)

    instances = instances if isinstance(instances, list) else [instances]

    instances = [
        json_format.ParseDict(instance_dict, Value()) for instance_dict in instances
    ]

    parameters_dict = {}

    parameters = json_format.ParseDict(parameters_dict, Value())

    endpoint = client.endpoint_path(
        project=project, location=location, endpoint=endpoint_id
    )


    response = client.predict(
        endpoint=endpoint, instances=instances, parameters=parameters
    )

    res_list = list(response.predictions)
    prediction = res_list[0].split('\nOutput:\n')[-1]

    return prediction


In [5]:
instances = [
    {
        "prompt": "What is a car?",
        "max_tokens": 50,
        "temperature": 1.0,
        "top_p": 1.0,
        "top_k": 10,
    },
]

In [6]:
res = predict_custom_trained_model_sample(
    project=PROJECT_NUMBER,
    endpoint_id=ENDPOINT,
    location="us-central1",
    instances=instances
)

In [7]:
print(res)

A car is a vehicle that is primarily designed to transport people. It is typically powered by an engine and has four wheels. Cars are used for transportation, recreation, and work.


## Vector Embeddings

In [12]:
from vertexai.language_models import TextEmbeddingModel

def text_embedding(text_input) -> list:
    """
    Text embedding with a Large Language Model.

    Args:
      - text_input (str).

    Returns
      - vector (List[floats]).

    """
    model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")
    embeddings = model.get_embeddings([text_input])
    for embedding in embeddings:
        vector = embedding.values
    return vector


In [13]:
text_embedding('My name is Cesar and Im a masters student at IE')

[-0.05585367605090141,
 -0.010443017818033695,
 0.01454449724406004,
 0.05574707314372063,
 0.04002979397773743,
 -0.018696900457143784,
 0.042522404342889786,
 -0.009606636129319668,
 0.003927952144294977,
 0.01396148931235075,
 0.002298352774232626,
 0.013790574856102467,
 -0.03282814472913742,
 -0.012836765497922897,
 0.027354616671800613,
 -0.03451155498623848,
 -0.039617106318473816,
 -0.050513047724962234,
 0.03604802489280701,
 0.041966792196035385,
 -0.04956933483481407,
 -0.0005892147310078144,
 0.002748505910858512,
 -0.007880281656980515,
 -0.001824500854127109,
 -0.05320842191576958,
 -0.01749778538942337,
 -0.06078367307782173,
 0.03985645994544029,
 -0.0218134056776762,
 0.03048328496515751,
 0.017242107540369034,
 -0.035648979246616364,
 0.00971344206482172,
 -0.0030730366706848145,
 0.02330121025443077,
 -0.03649340569972992,
 -0.0013629054883494973,
 -0.00021251407451927662,
 0.009062591940164566,
 0.04576975479722023,
 0.018228042870759964,
 0.04485514014959335,
 -0.0

# Storing embeddings

### Task 1. Find a data structure and function that can store text chunks and their embeddings. Write a function to easily add a new text chunk to that data structure.

In [11]:
def add_chunk(text_string, vector_memory=vector_memory):
  """
  Adds new text to vector memory.

  Args:
    - text_string (str). The text we want to embed and store.
    - vector_memory (Dict) Dictionary mapping from text_strings to embedding vectors.
  """
  if text_string not in vector_memory:
    text_vector = text_embedding(text_string) # Hint: Fill in with the correct variable that should be embedded
    vector_memory[text_string] = text_vector

In [10]:
vector_memory = {}

# Retrieving embeddings

### Task 2. Write a function to retrieve the k most similar embeddings from that data structure.

In [36]:
def find_k_nearest_neighbors(query: str, k, vector_memory=vector_memory):
  """
  Given a query string, retrieves the k most similar vectors.

  Args:
    - query (string).
    - k (int)
    - vector_memory (Dict). Maps of text and embeddings

  Returns:
    - k most similar text strings.
  """

  distances = {}
  query_vector = text_embedding(query)

  for item in vector_memory:
    embedding = vector_memory[item]
    distance = [sum((a-b)**2 for a, b in zip(embedding, query_vector))] # Hint: Fill in with the correct variable related to the query
    distances[item] = distance

  sorted_list = sorted(distances.items(), key=lambda distance: query_vector[1]) # Hint: Replace the first blank with the variable used to sort and the second with how to access distance

  nearest_neighbors = [item for item in sorted_list[:k]] # Hint: Replace blank with the proper way to get the text from the sorted items
  return nearest_neighbors



# Loading PDFs

### Task 3. Write a function that takes a filename, extracts the text of a pdf document, and splits it into chunks. You can use pypdf for that.

In [15]:
!pip install pypdf



In [22]:
import pypdf
filename = '1706.03762.pdf'
with open(filename, 'rb') as pdf_file:
    pdf_reader = pypdf.PdfReader(pdf_file)  # Hint: Fill in with the variable that represents the opened file
    page = pdf_reader.pages[1]
    print(page.extract_text())  # Hint: Fill in with the variable that represents the page from which you want to extract text


1 Introduction
Recurrent neural networks, long short-term memory [ 13] and gated recurrent [ 7] neural networks
in particular, have been firmly established as state of the art approaches in sequence modeling and
transduction problems such as language modeling and machine translation [ 35,2,5]. Numerous
efforts have since continued to push the boundaries of recurrent language models and encoder-decoder
architectures [38, 24, 15].
Recurrent models typically factor computation along the symbol positions of the input and output
sequences. Aligning the positions to steps in computation time, they generate a sequence of hidden
states ht, as a function of the previous hidden state ht−1and the input for position t. This inherently
sequential nature precludes parallelization within training examples, which becomes critical at longer
sequence lengths, as memory constraints limit batching across examples. Recent work has achieved
significant improvements in computational efficiency through factor

In [27]:
import pypdf

def extract_text_from_pdf(filename):
  """
  Loads a PDF file and chunks it.

  Args:
    - filename (str): path to PDF file.

  Returns:
    - List of paragraphs from the PDF.
  """

  with open(filename, 'rb') as pdf_file:
    pdf_reader = pypdf.PdfReader(pdf_file)
    chunks = []

    for page in pdf_reader.pages:
      text = page.extract_text()
      paragraphs = text.split("\n")  # Hint: Fill in with the correct delimiter for splitting text into paragraphs

    return paragraphs  # Hint: Replace blank with the correct variable to return that holds all the paragraphs


### Task 4. Load text chunks, create embeddings and save them in your vector data structure.

In [30]:
# Get the chunks from the PDF
chunks = extract_text_from_pdf('1706.03762.pdf')

# Add each chunk to the vector memory
for chunk in chunks:
  add_chunk(chunk)

### Task 5. Get a question from the user and find the most relevant context from the PDF you loaded.

In [48]:
# Obtain the query from the user
query = input('Enter your query: ')
k = 3

# Query the vector memory
results = find_k_nearest_neighbors(query, k)

Enter your query: what is a recural neural network?


### Task 6. Use prompt engineering and the Gemma LLM to answer the question based on the context you retrieved.

In [53]:
instances = [
    {
        "prompt": prompt,
        "max_tokens": 50,
        "temperature": 1.0,
        "top_p": 1.0,
        "top_k": 10,
    },
]

In [54]:
# Convert the results to a string that can be used in the prompt
results = str(results)
context = '\n'.join(results)


# Prompt the LLM with the results
prompt = f"""Based on the following context items, please answer the query.
Give yourself room to think by extracting relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Make sure your answers are as explanatory as possible.
{context}
\nRelevant passages: <extract relevant passages from the context here>
User query: {query}
Answer:"""

In [51]:
res = predict_custom_trained_model_sample(
    project=PROJECT_NUMBER,
    endpoint_id=ENDPOINT,
    location="us-central1",
    instances=instances
)

In [52]:
print(res)

 <answer based on the relevant passages>


### Relevant Passages:

- Layer5: Input and output are both sequences of numbers representing continuous values.
- The: Represents the sequence of numbers in the input data.

### Answer:


Next steps:
- Store vectors in embedding database
- deploy trained model
- Use chat history as context
- Update response into specific format
- Deploy function to the cloud