# Evaluation
- For retriever
- For generator

## Setting
 - Auto Reload
 - path for utils

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
module_path = "../../.."
sys.path.append(os.path.abspath(module_path))

## 1. Bedrock Client 생성

In [3]:
import json
import boto3
from pprint import pprint
from termcolor import colored
from utils import bedrock, print_ww
from utils.bedrock import bedrock_info

### ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----
- os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
- os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
- os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
- os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."

In [4]:
boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

aws_region = os.environ.get("AWS_DEFAULT_REGION", None)
print (colored("\n== FM lists ==", "green"))
pprint (bedrock_info.get_list_fm_models())

Create new client
  Using region: None
  Using profile: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)
[32m
== FM lists ==[0m
{'Claude-Instant-V1': 'anthropic.claude-instant-v1',
 'Claude-V1': 'anthropic.claude-v1',
 'Claude-V2': 'anthropic.claude-v2',
 'Cohere-Embeddings-En': 'cohere.embed-english-v3',
 'Cohere-Embeddings-Multilingual': 'cohere.embed-multilingual-v3',
 'Command': 'cohere.command-text-v14',
 'Command-Light': 'cohere.command-light-text-v14',
 'Jurassic-2-Mid': 'ai21.j2-mid-v1',
 'Jurassic-2-Ultra': 'ai21.j2-ultra-v1',
 'Llama2-13b-Chat': 'meta.llama2-13b-chat-v1',
 'Titan-Embeddings-G1': 'amazon.titan-embed-text-v1',
 'Titan-Text-G1': 'amazon.titan-text-express-v1',
 'Titan-Text-G1-Light': 'amazon.titan-text-lite-v1'}


## 2.LLM 로딩 (Jurassic)

In [5]:
from langchain.llms.bedrock import Bedrock
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [6]:
llm_claude = Bedrock(
    model_id=bedrock_info.get_model_id(model_name="Claude-V2"),
    client=boto3_bedrock,
    model_kwargs={
        "max_tokens_to_sample": 512
    },
    streaming=False,
    callbacks=[StreamingStdOutCallbackHandler()]
)

llm_evaluator = Bedrock(
    model_id=bedrock_info.get_model_id(model_name="Jurassic-2-Ultra"),
    client=boto3_bedrock,
    model_kwargs={
        #"max_tokens": 512,
        "maxTokens": 512
    },
    streaming=False,
    callbacks=[StreamingStdOutCallbackHandler()]
)

## 3.Evaluation
 - [langchain evaluator](https://python.langchain.com/docs/guides/evaluation/string/criteria_eval_chain)

In [7]:
import pandas as pd

In [8]:
ground_thruth = pd.read_csv("eval_dataset.csv")

In [9]:
### 3.1

### 3.2 generation evaluator

In [10]:
from langchain.evaluation import Criteria
from langchain.evaluation import EvaluatorType
from langchain.evaluation import load_evaluator

In [11]:
list(Criteria)

[<Criteria.CONCISENESS: 'conciseness'>,
 <Criteria.RELEVANCE: 'relevance'>,
 <Criteria.CORRECTNESS: 'correctness'>,
 <Criteria.COHERENCE: 'coherence'>,
 <Criteria.HARMFULNESS: 'harmfulness'>,
 <Criteria.MALICIOUSNESS: 'maliciousness'>,
 <Criteria.HELPFULNESS: 'helpfulness'>,
 <Criteria.CONTROVERSIALITY: 'controversiality'>,
 <Criteria.MISOGYNY: 'misogyny'>,
 <Criteria.CRIMINALITY: 'criminality'>,
 <Criteria.INSENSITIVITY: 'insensitivity'>,
 <Criteria.DEPTH: 'depth'>,
 <Criteria.CREATIVITY: 'creativity'>,
 <Criteria.DETAIL: 'detail'>]

In [13]:
evaluator = load_evaluator(
    EvaluatorType.LABELED_CRITERIA,
    llm=llm_evaluator,
    criteria=Criteria.CORRECTNESS
)

### Check prompt for evaluation

In [14]:
ground_thruth.head()

Unnamed: 0,question,answer,doc_id,doc
0,How do you obtain the Knox E-FOTA client APK t...,"To obtain the Knox E-FOTA client APK, go to th...",287d4520-4a12-4155-9894-5bde9855f147,Install an app through an EMM. Explains how to...
1,What are the three general steps involved in i...,"1. Obtain the Knox E-FOTA client APK, 2. Add K...",287d4520-4a12-4155-9894-5bde9855f147,Install an app through an EMM. Explains how to...
2,How can you search for specific EMM groups in ...,You can search for specific EMM groups by name...,e701c1d1-77a5-497c-b178-d02c9e0a921a,Manage EMM groups. Explains all the tools you ...
3,What happens when an EMM group's status change...,When an EMM group's status changes from the in...,e701c1d1-77a5-497c-b178-d02c9e0a921a,Manage EMM groups. Explains all the tools you ...
4,What are the two steps required before you can...,You must add the required app to Blackberry an...,b557f573-4ab7-4737-ba5e-ead566f3e34a,Prerequisites for connecting to Blackberry. De...


### Set reranker model

In [15]:
import json
import boto3
from termcolor import colored

In [16]:
runtime_client = boto3.Session().client('sagemaker-runtime')
print (f'runtime_client: {runtime_client}')

runtime_client: <botocore.client.SageMakerRuntime object at 0x7f43e3b4a8f0>


In [17]:
#endpoint_name = "huggingface-pytorch-inference-2023-11-15-04-37-45-120" # ml.m5.2xlarge
endpoint_name = "huggingface-pytorch-inference-2023-11-15-07-53-21-605" # ml.g5.xlarge
deserializer = "application/json"

In [18]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

generation_prompt_template = """
Here is the context, inside <context></context> XML tags.

<context>
{context}
</context>

Only using the context as above, answer the following question with the rules as below:
    - Don't insert XML tag such as <context> and </context> when answering.
    - Write as much as you can
    - Be courteous and polite
    - Only answer the question if you can find the answer in the context with certainty.
    - Use three sentences maximum and keep the answer concise.
    - If the answer is not in the context, just say "Could not find answer in given contexts."

Question:
{question}

Answer:"""

PROMPT_GENERATION = PromptTemplate(
    template=generation_prompt_template, input_variables=["context", "question"]
)

llm_chain_generation = LLMChain(llm=llm_claude, prompt=PROMPT_GENERATION)

In [19]:
for row in ground_thruth.itertuples():
    question, answer = getattr(row, "question"), getattr(row, "answer")
    
    doc = getattr(row, "doc")
    prediction = llm_chain_generation.predict(
        question=question,
        context=doc
    )    
    payload = json.dumps(
        {
            "inputs": [
                {"text": answer, "text_pair": prediction},
            ]
        }
    )
    
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType="application/json",
        Accept=deserializer,
        Body=payload
    )
    ## deserialization
    out = json.loads(response['Body'].read().decode()) ## for json
    
    eval_result = evaluator.evaluate_strings(
        input=question,
        prediction=prediction,
        reference=answer,
    )
    
    print (colored("Question: ", "red"), colored(question, "red"))
    print (colored("GroundTruth: ", "green"), colored(answer, "green"))
    print (colored("Prediction: ", "blue"), colored(prediction, "blue"))
    print (f'ReRanker similarity: {out[0]["score"]}')
    print (f'Langchain correctness: {eval_result["score"]}')

[31mQuestion: [0m [31mHow do you obtain the Knox E-FOTA client APK that is required to install the app through an EMM?[0m
[32mGroundTruth: [0m [32mTo obtain the Knox E-FOTA client APK, go to the Devices page, select All Devices, and click Direct Download. A download link for the APK file will be displayed, which can be copied to download the file to the local machine[0m
[34mPrediction: [0m [34m Go to Devices > All Devices. Click Direct Download. In the Direct Download popup, copy the download link for the APK file. The APK file is downloaded to your local machine.[0m
ReRanker similarity: 0.9997283816337585
Langchain correctness: 0
[31mQuestion: [0m [31mWhat are the three general steps involved in installing an app like Knox E-FOTA using an EMM?[0m
[32mGroundTruth: [0m [32m1. Obtain the Knox E-FOTA client APK, 2. Add Knox E-FOTA as an internal application and assign it, and 3. Set a policy to install the app to your target devices[0m
[34mPrediction: [0m [34m Based


KeyboardInterrupt

