In [None]:
%pip install --upgrade --quiet google-cloud-aiplatform==1.55.0 nest-asyncio==1.5.9

In [None]:
import vertexai
from vertexai.preview.evaluation import (
    EvalTask,
    PromptTemplate,
    CustomMetric,
    make_metric,
)
import pandas as pd
import datetime
import nest_asyncio
from IPython.display import display, Markdown, HTML

from vertexai.generative_models import (
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold
)

nest_asyncio.apply()


In [None]:
import vertexai

PROJECT_ID = ! gcloud config get-value project
PROJECT_ID = PROJECT_ID[0]
LOCATION = "us-central1" # @param {type:"string"}

# define project information manually if the above code didn't work
if PROJECT_ID == "(unset)":
  PROJECT_ID = "[your-project-id]" # @param {type:"string"}

print(PROJECT_ID)

vertexai.init(project=PROJECT_ID, location=LOCATION)

## Download Apartment Rental Data

In [None]:
!gcloud storage cp gs://pls-resource-bucket/evaluation-data/apartment_table.csv .

In [None]:
# Just show what the data looks like.
apartment_df = pd.read_csv("apartment_table.csv")
apartment_df.head()


In [None]:
# Just show 1 record in JSON format
apartment_records = apartment_df.to_dict(orient='records')
apartment_records[0]

## Use Gemini to create a rental listing based on the features of the apartment

In [None]:
model = GenerativeModel(
  "gemini-1.5-flash-001",
  generation_config={
      "temperature": 0,
      "top_p": 0.4,
  },
)


prompt = """Write a one-paragraph apartment listing
to promote the best features of this apartment: """


# View the response using Markdown to format it nicely for notebook viewing
Markdown(model.generate_content(prompt + str(apartment_records[0])).text)


In [None]:
# Convert the dictionary for each record is to a string
contexts = [str(record) for record in apartment_records]
# Create full prompts by combining the prompt and the context data
full_prompts = [prompt + str(record) for record in apartment_records]

eval_dataset = pd.DataFrame(
  {
     # 'content' is used to generate responses
      "content": full_prompts,
     # 'instruction' is considered by some metrics, like fulfillment
      "instruction": full_prompts,
     # 'context' is the information provided to your model to help
     # it generate more informed & accurate responses
      "context": contexts,
     # If you had already generated responses for all of your examples
     # you could provide them as a list of values with a key of
     # 'response' instead of having the evaluation service re-generate
     # them.
     # "response": responses
  }
)


In [None]:
for prompt in full_prompts:
  print(prompt)

## Evaluate the model's response for each apartment listing using the metric bundles: Fulfillment and Groundedness

Fulfillment means the model included everything.

Groundedness means the model didn't make anything up.


In [None]:
qa_eval_task = EvalTask(
   dataset=eval_dataset,
   metrics=["fulfillment", "groundedness"],
   experiment="apartment-listing-generation",
)


In [None]:
run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
result = qa_eval_task.evaluate(
model=model,
      experiment_run_name=f"apartment-listing-gen-{run_ts}"
    )


# You might want to keep track of your results in a list to help
# in plotting purposes, as you'll see later on.
evaluation_results = []
evaluation_results.append(result)


In [None]:
def display_eval_report(eval_result, metrics=None):
   """Display the evaluation results."""


   title, summary_metrics, report_df = eval_result
   metrics_df = pd.DataFrame.from_dict(summary_metrics, orient="index").T
   if metrics:
       metrics_df = metrics_df.filter(
           [
               metric
               for metric in metrics_df.columns
               if any(selected_metric in metric for selected_metric in metrics)
           ]
       )
       report_df = report_df.filter(
           [
               metric
               for metric in report_df.columns
               if any(selected_metric in metric for selected_metric in metrics)
           ]
       )


   # Display the title with Markdown for emphasis
   display(Markdown(f"## {title}"))


   # Display the metrics DataFrame
   display(Markdown("### Summary Metrics"))
   display(metrics_df)


   # Display the detailed report DataFrame
   display(Markdown(f"### Report Metrics"))
   display(report_df)


display_eval_report((("Eval Result", result.summary_metrics, result.metrics_table)))


In [None]:
print(result.metrics_table["groundedness/explanation"][7])

In [None]:
# This is a minor update, but should stop the model from inventing
# as many details about each apartment.
updated_prompt = "Write an apartment listing promoting the best features of this apartment. Use only the details included in the following information: "


updated_full_prompts = [updated_prompt + str(record) for record in apartment_records]


eval_dataset = pd.DataFrame(
   {
       "content": updated_full_prompts,
       "instruction": updated_full_prompts,
       "context": contexts, # The contexts haven't changed
   }
)


In [None]:
qa_eval_task = EvalTask(
   dataset=eval_dataset,
   metrics=["fulfillment", "groundedness"],
   experiment="apartment-listing-generation",
)


run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
result = qa_eval_task.evaluate(
	model=model,
      experiment_run_name=f"apartment-listing-gen-{run_ts}"
    )


# Append the new result to your results
evaluation_results.append(result)


# Preview the results
display_eval_report((("Eval Result 2", result.summary_metrics, result.metrics_table)))


In [None]:
print(evaluation_results[0])

In [None]:
import plotly.graph_objects as go


def plot_bar_plot(eval_results, metrics=None):
   fig = go.Figure()
   data = []


   for eval_result in eval_results:
       #title, summary_metrics, _ = eval_result
       #title = eval_result.title
       summary_metrics = eval_result.summary_metrics
       if metrics:
           summary_metrics = {
               k: summary_metrics[k]
               for k, v in summary_metrics.items()
               if any(selected_metric in k for selected_metric in metrics)
           }


       data.append(
           go.Bar(
               x=list(summary_metrics.keys()),
               y=list(summary_metrics.values()),
               #name=title,
           )
       )


   fig = go.Figure(data=data)


   # Change the bar mode
   fig.update_layout(barmode="group")
   fig.show()


plot_bar_plot(evaluation_results, metrics=["fulfillment/mean", "groundedness/mean"])


In [None]:
print(evaluation_results[0])