# RAG with Gemini Pro

In [6]:
# Packages
from RAG_Functions import *
import time

## Embedding Model

In [7]:
# Packages
from sentence_transformers import SentenceTransformer

In [8]:
# embedding model
embedding_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
embedding_model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

## Chat Model

In [32]:
import google.generativeai as genai
import os

# Load API key from '~/Documents/Google/data-engineering-project.txt'
with open(os.path.expanduser('~/Documents/Google/data-engineering-project.txt')) as f:
    GOOGLE_API_KEY = f.read().strip()

genai.configure(api_key=GOOGLE_API_KEY)

chat_model = genai.GenerativeModel('gemini-1.0-pro-latest')
print(chat_model)

genai.GenerativeModel(
    model_name='models/gemini-1.0-pro-latest',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
)


## Milvus Connection

In [33]:
from pymilvus import Collection, connections
connections.connect(host='localhost', port='19530')
collection = Collection("text_embeddings")      # Get an existing collection.
# index_params = {
#     "metric_type": "COSINE",
#     "index_type": "FLAT"#,
#     #"params": {"nlist": 128}
# }
# collection.drop_index()
# collection.create_index(field_name="embedding", index_params=index_params)
# "metric_type": "L2",
#     "index_type": "IVF_FLAT",
#     "params": {"nlist": 128}
collection.load()

## Perform Chat

In [34]:
# Chat with model
input_text = input()

# Get embedding of input
input_embedding = get_mixedbread_of_query(embedding_model, input_text)

# Start timing query
start_time = time.time()

# Top5 sentences
top5_sentences = return_top_5_sentences(collection, input_embedding)

# End timing query
end_time = time.time()

# query time
query_time = end_time - start_time

print(top5_sentences)

['iCloud: What Is Personal Data at Apple?', 'iCloud: Details including salary, income, and assets information where collected, and information related to Apple-branded financial offerings\nGovernment ID Data.', 'iCloud: Apple may collect data about you from other individuals     for example, if that individual has sent you a product or gift card, invited you to participate in an Apple service or forum, or shared content with you.', 'iCloud: Descriptions of how Apple handles personal data for certain individual services are available at apple.com/legal/privacy/data.', 'iCloud: You also can view this information at any time, either in Settings related to those features and/or online at apple.com/legal/privacy.']


In [35]:
# Construct prompt
prompt_lines = ["Context That May Be Helpful (You May Disregard if Not Helpful):"] + top5_sentences + ["User Query:\n" + input_text]
prompt = "\n".join(prompt_lines)
print(prompt)

#Context:
#Document Name: <document_filename_1>
#Information: <sentence_1>
#Document Name: <document_filename_2>
#Information: <sentence_2>
#Document Name: <document_filename_3>
#Information: <sentence_3>
#Document Name: <document_filename_4>
#Information: <sentence_4>
#Document Name: <document_filename_5>
#Information: <sentence_5>
#<user_query>

Context That May Be Helpful (You May Disregard if Not Helpful):
iCloud: What Is Personal Data at Apple?
iCloud: Details including salary, income, and assets information where collected, and information related to Apple-branded financial offerings
Government ID Data.
iCloud: Apple may collect data about you from other individuals     for example, if that individual has sent you a product or gift card, invited you to participate in an Apple service or forum, or shared content with you.
iCloud: Descriptions of how Apple handles personal data for certain individual services are available at apple.com/legal/privacy/data.
iCloud: You also can view this information at any time, either in Settings related to those features and/or online at apple.com/legal/privacy.
User Query:
What data does Apple store about me?


In [36]:
# Get response
response = chat_model.generate_content(prompt)

# from IPython.display import Markdown
# import textwrap
# def to_markdown(text):
#   text = text.replace('â€¢', '  *')
#   return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
# to_markdown(response.text)
print(response.text)

- iCloud: Details including salary, income, and assets information where collected, and information related to Apple-branded financial offerings
- Government ID Data
- Data from other individuals who have interacted with you through Apple services


In [37]:
#print(top5_sentences.get('sentence'))

In [38]:
# for hits in top5_sentences:
#     # Get ids
#     print(hits.ids)
    
#     # Get distances
#     print(hits.distances)
    
#     for hit in hits:
#         # Get id
#         print(hit.id)
        
#         # Get distance
#         print(hit.distance) # hit.score
        
#         # Get vector
#         #hit.vector
        
#         # Get output field
#         print(hit.get("sentence"))

In [39]:

# Tokenize
# input_ids = chat_tokenizer(input_text, return_tensors="pt").input_ids

# outputs = chat_model.generate(input_ids)
# print(chat_tokenizer.decode(outputs[0]))