# App design

In this notebook we will explore the code needed to set up the app

In [23]:
import os 
import sys
import json
import tempfile

from dotenv import load_dotenv

project_path = os.path.dirname(os.getcwd())
sys.path.append(project_path)

from src.storage import StorageManager
from src.rag import RAG

load_dotenv(override=True)

GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
GCP_PROJECT_ID=os.getenv('GCP_PROJECT_ID')
BUCKET=os.getenv('BUCKET')
INDEX_NAME = os.getenv('INDEX_NAME')
ELASTICSEARCH_HOST = 'localhost'
ELASTICSEARCH_PORT = os.getenv('ELASTICSEARCH_PORT')
EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL')
TEXT_FIELDS = os.getenv('TEXT_FIELDS').split(',')
KEYWORD_FIELDS = os.getenv('KEYWORD_FIELDS').split(',')

In [24]:
entry_template = """
category: {category}
paper: {paper}
text: {text}
""".strip()

prompt_template = """
You are a research assistant specializing in various academic fields. 
Your task is to provide accurate and concise answers to questions based on the information extracted from the provided research papers.

**Question:** {question}

**Context:**

{context}

**Guidelines:**

* **Cite your sources:** If you reference specific information from a paper, include the paper title in parentheses, e.g., "(Attention is all You need)".
* **Prioritize relevance:** Only use information from the context that is directly relevant to the question.
* **Be concise:** Provide clear and focused answers without unnecessary elaboration.
* **Maintain academic tone:** Use language appropriate for an academic audience.
* **If the context doesn't contain enough information to fully answer the question, clearly state that you need more information or that the context doesn't address the question.**

**Answer:**
"""

In [25]:
storage_manager = StorageManager(
    gcp_project=GCP_PROJECT_ID, 
    bucket_name=BUCKET
)



In [26]:
with tempfile.TemporaryDirectory() as tmp_dir:
    rag_config_path = os.path.join(tmp_dir, 'rag_config.json')
    storage_manager.download_file(rag_config_path, 'rag_config.json')
    rag_config = json.load(open(rag_config_path))

In [17]:
rag = RAG(api_key=GOOGLE_API_KEY)
rag.update_parameters(**rag_config)
rag.get_es_manager(
    index_name=INDEX_NAME,
    text_fields=TEXT_FIELDS,
    elasticsearch_host=ELASTICSEARCH_HOST,
    elasticsearch_port=ELASTICSEARCH_PORT,
    embedding_model_name=EMBEDDING_MODEL
)
rag.set_prompt_templates(
    entry_template=entry_template,
    prompt_template=prompt_template
)

In [18]:
filter_dict = {"categoy":"deeplearning"}
rag.update_parameters(filter_dict=filter_dict)

In [19]:
rag.answer(
    query="What is the key idea of the paper 'Attention is all You need'?",
    search="elasticsearch"
)

{'answer': 'The key idea of the paper "Attention is all You Need" is that **attention mechanisms can be used to replace recurrent neural networks (RNNs) in sequence-to-sequence models, achieving state-of-the-art results on machine translation tasks**.  The paper proposes a novel architecture called the Transformer, which relies solely on attention mechanisms to process input sequences and generate output sequences. This eliminates the need for RNNs, which are known to be computationally expensive and struggle with long-term dependencies.  The Transformer\'s success demonstrates the power of attention mechanisms in capturing complex relationships between elements in a sequence, leading to significant improvements in performance and efficiency. \n',
 'model_used': 'models/gemini-1.5-flash-latest',
 'response_time': 2.715599775314331,
 'relevance': 'RELEVANT',
 'relevance_explanation': 'The answer accurately summarizes the key idea of the paper, highlighting the use of attention mechanism

In [25]:
from marshmallow import Schema, fields, ValidationError

class QuestionSchema(Schema):
    question = fields.Str(required=True)
    category = fields.Str(required=False)
    
class FeedbackSchema(Schema):
    conversation_id = fields.Str(required=True)
    feedback = fields.Int(required=True)

In [42]:
request_data = {
    "question": "What is the key idea of the paper 'Attention is all You need'?",
    "category": "deeplearning"
}


In [41]:
question_schema = QuestionSchema()

try: 
    data = question_schema.load(request_data)
    question = data.get('question')
    category = data.get('category')
except ValidationError as err:
    print(err.messages)
    
if category:
    filter_dict = {"category": category}
    rag.update_parameters(filter_dict=filter_dict)

{'question': ['Missing data for required field.'], 'questionr': ['Unknown field.']}


In [39]:
category