In [1]:
%load_ext autoreload
%autoreload 2

import json
from pathlib import Path
from utils import client, results_path, json_prefix, gpt_prediction

TEMP, SAMP = "temperature", "sample_count"
results_folder = Path.cwd() / "results"

In [2]:
experiment_folder = "sentence_rare_city"
param_value = 5
param = SAMP

In [3]:
experiment_folder = results_folder / experiment_folder
in_context_prompt = (experiment_folder / f"in_context_prompt{f'_{param_value}' if param == SAMP else ''}.txt").read_text()
print(in_context_prompt)
print()
test_prompt = (experiment_folder / f"test_prompt{f'_{param_value}' if param == SAMP else ''}.txt").read_text()
test_prompt = json_prefix + test_prompt
print(test_prompt)
print()
original_response = (experiment_folder / f"response_{param_value}.json").read_text()
print(original_response)

[{'input': 'The Louvre Museum houses famous works of art.', 'label': True},
 {'input': 'The bustling city streets echoed with the sounds of honking horns '
           'and hurried footsteps.',
  'label': True},
 {'input': 'The Taj Mahal is a symbol of love.', 'label': True},
 {'input': 'The city at night resembled a vast canvas, painted with the '
           'vibrant hues of illuminated billboards and skyscrapers.',
  'label': True},
 {'input': 'The Statue of Liberty stands tall in New York.', 'label': True},
 {'input': 'the mountains are majestic and grand.', 'label': False},
 {'input': 'the world is full of fascinating cultures.', 'label': False},
 {'input': 'Green smoothies are a nutritious breakfast option.',
  'label': False},
 {'input': "LIFE IS WHAT HAPPENS WHEN YOU'RE BUSY MAKING PLANS.",
  'label': False},
 {'input': 'Dancing in the flickering candlelight', 'label': False}]

Please label the following inputs. Respond in JSON format like the examples given to you above.
[{'inpu

In [4]:
original_messages = [
    {"role": "system", "content": in_context_prompt},
    {"role": "user", "content": test_prompt},
    {"role": "assistant", "content": original_response},
]

rule_question = {"role": "user", "content": "What rule did you use to classify the samples above?"}

In [5]:
from config import seed
model = "gpt-4-1106-preview" # since we used json mode everywhere already

articulation_after_labeling = original_messages + [rule_question]

response = client.chat.completions.create(
        model=model,
        messages=articulation_after_labeling,
        temperature=1 if param != TEMP else param_value,
        max_tokens=4096, # max for the 1106-preview model
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        seed=seed
    )

articulation_post = response.choices[0].message.content

In [6]:
articulation_before_labeling = [original_messages[0], rule_question]

response = client.chat.completions.create(
        model=model,
        messages=articulation_before_labeling,
        temperature=1 if param != TEMP else param_value,
        max_tokens=4096, # max for the 1106-preview model
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        seed=seed
    )

articulation_pre = response.choices[0].message.content

In [11]:
print(articulation_pre)
print("++++++++++++++++++")
print(articulation_post)

It appears that the rule for classification might involve determining whether a statement is describing a concrete, verifiable fact about a specific subject or place, as opposed to a subjective statement, general reflection, or an incomplete sentence. Let's break down the judgments based on this possible rule:

1. "The Louvre Museum houses famous works of art." - True
   This is a concrete, verifiable fact about the Louvre Museum.

2. "The bustling city streets echoed with the sounds of honking horns and hurried footsteps." - True
   Although slightly descriptive, this still presents a concrete scenario that can be true for many bustling city streets.

3. "The Taj Mahal is a symbol of love." - True
   This is a widely accepted fact about the symbolic meaning of the Taj Mahal.

4. "The city at night resembled a vast canvas, painted with the vibrant hues of illuminated billboards and skyscrapers." - True
   This is a descriptive observation that can be verified by someone looking at a ci

In [8]:
label_after_articulation = articulation_before_labeling + [{"role": "assistant", "content": articulation_pre}, original_messages[-2]]

response_json = gpt_prediction("", "", label_after_articulation, temperature=1 if param != TEMP else param_value, json_mode=True)

  warn("json_mode only supported for gpt-4-1106-preview")
  warn("changing model to gpt-4-1106-preview")
  warn("json_mode is on but user_query does not contain \"json\"")
"Please label the following inputs. Respond in JSON format like the examples given to you above.
"
  warn(f"adding prefix to user_query:\n\"{json_prefix}\"")
  warn("messages is not None. Overriding system_prompt and user_query")


In [9]:
from pprint import pprint
pprint(response_json)

[{'input': 'The cityscape was a mix of modern architecture and historic '
           'landmarks, telling the story of the urban evolution.',
  'label': True},
 {'input': "TIME FLIES WHEN YOU'RE HAVING FUN.", 'label': False},
 {'input': 'ALWAYS STRIVE FOR EXCELLENCE.', 'label': False},
 {'input': 'mediterranean cuisine is delicious and healthy.', 'label': False},
 {'input': 'THE MOONLIGHT CASTS A GLOW OVER THE NIGHT.', 'label': False},
 {'input': 'rainy days are perfect for cozy reading.', 'label': False},
 {'input': 'a secret pathway leading into the unknown', 'label': False},
 {'input': "Pedestrians navigated through the city's maze-like streets, guided "
           'by the glow of streetlights.',
  'label': True},
 {'input': 'TWINKLING AMIDST THE CITY SKYLINE', 'label': False},
 {'input': 'The Eiffel Tower stands tall in Paris.', 'label': True},
 {'input': 'candles create a warm and inviting atmosphere.', 'label': False},
 {'input': 'WINTER BRINGS SNOW AND FROSTY MORNINGS.', 'label':

In [19]:
from utils import eval_response
test_samples = json.load(open(experiment_folder / f"test_samples{f'_{param_value}' if param == SAMP else ''}.json"))
diff_results, diff_summary = eval_response(response_json, json.loads(original_response))
pre_results, pre_summary = eval_response(response_json, test_samples)
post_results, post_summary = eval_response(json.loads(original_response), test_samples)

In [20]:
pprint(diff_summary)
pprint(pre_summary)
pprint(post_summary)

{'accuracy': 0.9555555555555556,
 'correct': 86,
 'corrupted': 0,
 'false': 64,
 'incorrect': 4,
 'mismatch': 0,
 'precision': 0.8928571428571429,
 'recall': 0.9615384615384616,
 'total': 90,
 'true': 26}
{'accuracy': 0.8666666666666667,
 'correct': 78,
 'corrupted': 0,
 'false': 70,
 'incorrect': 12,
 'mismatch': 0,
 'precision': 0.6428571428571429,
 'recall': 0.9,
 'total': 90,
 'true': 20}
{'accuracy': 0.8888888888888888,
 'correct': 80,
 'corrupted': 0,
 'false': 70,
 'incorrect': 10,
 'mismatch': 0,
 'precision': 0.6923076923076923,
 'recall': 0.9,
 'total': 90,
 'true': 20}


In [23]:
from pprint import pformat
# write summary file
summary = {
    "diff": diff_summary,
    "pre": pre_summary,
    "post": post_summary,
    "articulation_after_labeling": articulation_post,
    "articulation_before_labeling": articulation_pre
}
(experiment_folder / f"summary_{param_value}.json").write_text(json.dumps(summary, indent=4))

4238