In [1]:
# %pip install --upgrade pip
# %pip install -r requirements.txt

In [2]:
from evaluation import KnowledgeBasesEvaluations
from observability import BedrockLogs

# Import configuration values
from config import (
    REGION, FIREHOSE_NAME, CRAWLER_NAME, MODEL_ARN, KB_ID, EXPERIMENT_DESCRIPTION,
    APPLICATION_NAME, CUSTOM_TAG, GUARDRAIL_ID, GUARDRAIL_VERSION,
    MAX_TOKENS, TEMPERATURE, TOP_P, MODEL_ID_EVAL, MODEL_ID_GEN
)

from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    answer_similarity)

bedrock_logs = BedrockLogs(delivery_stream_name=FIREHOSE_NAME, experiment_id="343233")
metrics = [faithfulness,
        answer_relevancy,
        context_recall,
        answer_similarity,]

questions=[
        "How does the Company determine if an arrangement qualifies as a lease?",
        "What is the Company's policy regarding the recognition of right-of-use (ROU) assets and lease liabilities for short-term leases?",
        "How are operating lease ROU assets and lease liabilities presented on the Company's consolidated balance sheets?",
        "What was the total operating lease cost recognized by the Company during the year ended December 31, 2022?",
        "What was the weighted-average remaining lease term for operating leases as of December 31, 2022?",
        "According to the future minimum lease payment schedule, what is the total amount of lease payments due after 2027?",
        "What was the total operating lease liabilities of the Company as of December 31, 2022?",
        "What was the amount of finance lease right-of-use (ROU) assets included in property and equipment, net, on the consolidated balance sheets as of December 31, 2022?",
        "What were the weighted-average remaining lease term and weighted-average discount rate for the Company's finance leases as of December 31, 2022?",
        "What was the total amount of future minimum lease payments under non-cancelable finance leases as of December 31, 2022?",
        "How much did the Company receive in total proceeds from the two sale-leaseback transactions involving certain office buildings and data centers during the year ended December 31, 2022?",
        "What type of lease arrangements does the Company enter into as a lessor, and how does it recognize the related income?",
        "What was the total sublease income recognized by the Company during the year ended December 31, 2022, and where was it reported in the financial statements?",
        "According to the information provided, what are the expected undiscounted future cash inflows from non-cancelable operating subleases for the year 2025?",
        "Explain the significant judgments and estimates involved in the Company's lease accounting policies, as mentioned in the context information."
    ]

ground_truths = [
        "The Company determines if an arrangement is a lease at inception.",
        "The Company has elected not to recognize ROU assets and lease liabilities for short-term leases that have a lease term of 12 months or less. The Company recognizes the lease payments associated with its short-term leases as an expense on a straight-line basis over the lease term.",
        "Operating lease ROU assets are presented separately on the consolidated balance sheets. Operating lease liabilities are presented separately as current and non-current liabilities on the consolidated balance sheets.",
        "The Company recognized operating lease costs of $22.4 million during the year ended December 31, 2022.",
        "The weighted-average remaining lease term for operating leases as of December 31, 2022, was 5.2 years.",
        "According to the future minimum lease payment schedule, the total amount of lease payments due after 2027 is $22,400,000.",
        "$92,300",
        "$6.2 million",
        "The weighted-average remaining lease term for finance leases as of December 31, 2022 was 3.7 years, and the weighted-average discount rate was 5.2%.",
        "$7,100,000",
        "The Company received total proceeds of $48.5 million from the two sale-leaseback transactions involving certain office buildings and data centers during the year ended December 31, 2022.",
        "The Company enters into arrangements to sublease certain office spaces to third parties. These sublease arrangements are classified as operating leases, and the Company recognizes sublease income on a straight-line basis over the lease term.",
        "The Company recognized sublease income of $3.2 million during the year ended December 31, 2022, which was included in other income, net, in the consolidated statements of operations.",
        "According to the information provided, the expected undiscounted future cash inflows from non-cancelable operating subleases for the year 2025 is $2,400,000.",
        "The significant judgments and estimates involved in the Company's lease accounting policies, as mentioned in the context information, are:\n\n1. Determining whether a contract contains a lease, which requires evaluating if the Company has the right to control the use of an identified asset for a period of time in exchange for consideration.\n\n2. Allocating contract consideration between lease and non-lease components, particularly for arrangements involving multiple components, which involves applying judgment in determining the appropriate allocation methodology and standalone prices for each component.\n\n3. Determining the lease term, which involves assessing the likelihood of exercising renewal or termination options.\n\n4. Determining the incremental borrowing rate used to measure lease liabilities."
    ]

@bedrock_logs.watch(call_type='RAG-Evaluation')
def test_function(application_metadata):
    question, ground_truth = application_metadata['question'], application_metadata['ground_truth']
    results = {}
    kb_evaluate = KnowledgeBasesEvaluations(model_id_eval=MODEL_ID_EVAL, 
                          model_id_generation=MODEL_ID_GEN, 
                          metrics=metrics,
                          questions=question, 
                          ground_truth=ground_truth, KB_ID=KB_ID)
    kb_evaluate.evaluate() 
    results["evaluation_results"] = kb_evaluate.evaluation_results
    results["questions"] = kb_evaluate.questions
    results["ground_truth"] = kb_evaluate.ground_truth
    results["generated_answers"] = kb_evaluate.generated_answers
    results["contexts"] = kb_evaluate.contexts
    return results

for question, ground_truth in zip(questions, ground_truths):
    
    application_metadata = {
        'question': [question],
        'ground_truth': [ground_truth],
        'experiment_description': EXPERIMENT_DESCRIPTION,
        'application_name': APPLICATION_NAME, 
        'custom_tag': CUSTOM_TAG,
        'max_token': MAX_TOKENS, 
        'temperature': TEMPERATURE, 
        'top_p':TOP_P,
        'kb_id': KB_ID
        }
    # execute the test and track it:
    test_function(application_metadata)
    

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
@bedrock_logs.watch(call_type='Agent-Observability')
def my_agent_func(arg1_tracked, arg2_not_tracked, arg3_not_tracked):
    return None

# End