In [None]:
import os
import numpy as np
from src.utils.query_bigquery import query_bigquery
from src.utils.sample import get_stratified_sample
from src.utils.jsonify_data import jsonify_data
from src.utils.async_call_openai import gather_responses
from src.utils.write_to_bigquery import write_to_bigquery

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
FEEDBACK_PROJECT_ID = os.getenv("FEEDBACK_PROJECT_ID")
PUBLISHING_PROJECT_ID = os.getenv("PUBLISHING_PROJECT_ID")
FEEDBACK_TABLE = os.getenv("FEEDBACK_TABLE")
PUBLISHING_TABLE = os.getenv("PUBLISHING_TABLE")
LABELLED_FEEDBACK_TABLE = os.getenv("LABELLED_FEEDBACK_TABLE")
OPENAI_LABEL_FEEDBACK_TABLE = os.getenv("OPENAI_LABELLED_FEEDBACK_TABLE")
LABELLED_FEEDBACK_DATASET = os.getenv("LABELLED_FEEDBACK_DATASET")
OPENAI_LABEL_FEEDBACK_TABLE_FOR_REVIEW = os.getenv(
    "OPENAI_LABEL_FEEDBACK_TABLE_FOR_REVIEW"
)
OLD_OPENAI_LABEL_FEEDBACK_TABLE_FOR_REVIEW = os.getenv(
    "OLD_OPENAI_LABEL_FEEDBACK_TABLE_FOR_REVIEW"
)
EVALUATION_DATASET = os.getenv("EVALUATION_DATASET")

In [None]:
# Query BQ to pull the human labelled feedback data
query_read = """
SELECT * FROM  @feedback_sample_table
"""
query_read = query_read.replace("@feedback_sample_table", str(LABELLED_FEEDBACK_TABLE))

# Call the function to execute the query
labelled_sample = query_bigquery(
    PUBLISHING_PROJECT_ID,
    LABELLED_FEEDBACK_DATASET,
    query_read,
)

# Get a stratified sample of the labelled feedback data
stratified_sample = get_stratified_sample(
    records=labelled_sample,
    total_sample_size=20,
    id_key="feedback_record_id",
    label_key="labels",
)

stratified_sample

In [None]:
# This cell will label new records. Change the LIMIT to label more records.
unlabelled_data_query = """
SELECT
          feedback_record_id,
          STRING_AGG(response_value, ' '
          ORDER BY
            created) AS concatenated_response_value,
            rand() as r
        FROM
          @publishing_table
          WHERE DATE(created) >= "2024-01-01"
        GROUP BY
          feedback_record_id
        ORDER BY
          r
      LIMIT (10)"""

unlabelled_data_query = unlabelled_data_query.replace(
    "@publishing_table",
    str(PUBLISHING_TABLE),
)

unlabelled_data = query_bigquery(
    PUBLISHING_PROJECT_ID,
    LABELLED_FEEDBACK_DATASET,
    unlabelled_data_query,
)

# JSONify the data
labelled_subs_json = jsonify_data(records=stratified_sample, labelled=True)
new_subs_json = jsonify_data(records=unlabelled_data, labelled=False)

# Call the OpenAI API to get the completions
responses = await gather_responses(labelled_subs_json, new_subs_json, OPENAI_API_KEY)

# Print the responses if there are any errors, note errors seem to only be around urgency that i've seen
for item in responses:
    if '"urgency":' not in item["open_labelled_records"]:
        print(item)

# Check the costs of the queries
prompt_tokens = [response["prompt_tokens"] for response in responses]
mean_prompt_tokens = np.mean(prompt_tokens)
completion_tokens = [response["completion_tokens"] for response in responses]
mean_completion_tokens = np.mean(completion_tokens)
print(
    mean_prompt_tokens,
    mean_completion_tokens,
    f"cost of prompts: ${2687 * (0.0005/1000):.5f} ||| cost of completions: ${37 * (0.0015 / 1000):.5f}",
)

# Write the responses to the correct BigQuery table
write_to_bigquery(
    table_id=EVALUATION_DATASET,
    responses=responses,
    publishing_project_id=PUBLISHING_PROJECT_ID,
)

In [None]:
#  To query relabel the original 55 records that we used to test the quality of the promp use the following:
unlabelled_data_query = """
SELECT
  feedback_record_id,
  STRING_AGG(response_value, ' '
  ORDER BY
    created) AS concatenated_response_value
FROM
  @publishing_table
JOIN
  @old_table
ON
  @publishing_table.feedback_record_id = @old_table.id
GROUP BY
  feedback_record_id
  """

unlabelled_data_query = unlabelled_data_query.replace(
    "@publishing_table",
    str(PUBLISHING_TABLE),
)
unlabelled_data_query = unlabelled_data_query.replace(
    "@old_table",
    str(OLD_OPENAI_LABEL_FEEDBACK_TABLE_FOR_REVIEW),
)

unlabelled_data = query_bigquery(
    PUBLISHING_PROJECT_ID,
    LABELLED_FEEDBACK_DATASET,
    unlabelled_data_query,
)

# JSONify the data
labelled_subs_json = jsonify_data(records=stratified_sample, labelled=True)
new_subs_json = jsonify_data(records=unlabelled_data, labelled=False)

# Call the OpenAI API to get the completions
responses = await gather_responses(labelled_subs_json, new_subs_json, OPENAI_API_KEY)

# Print the responses if there are any errors, note errors seem to only be around urgency that i've seen
for item in responses:
    if '"urgency":' not in item["open_labelled_records"]:
        print(item)

# Check the costs of the queries
prompt_tokens = [response["prompt_tokens"] for response in responses]
mean_prompt_tokens = np.mean(prompt_tokens)
completion_tokens = [response["completion_tokens"] for response in responses]
mean_completion_tokens = np.mean(completion_tokens)
print(
    mean_prompt_tokens,
    mean_completion_tokens,
    f"cost of prompts: ${2687 * (0.0005/1000):.5f} ||| cost of completions: ${37 * (0.0015 / 1000):.5f}",
)

# Write the responses to the correct BigQuery table
write_to_bigquery(
    table_id=OPENAI_LABEL_FEEDBACK_TABLE_FOR_REVIEW,
    responses=responses,
    publishing_project_id=PUBLISHING_PROJECT_ID,
)