In [1]:
from core.models import GPTModel
import jsonlines, json
from tqdm.notebook import tqdm

## Import

In [2]:
import os

# Get the current working directory
current_dir = os.getcwd()
print("Current Directory:", current_dir)

# Change the current working directory to the parent directory
parent_dir = os.path.dirname(current_dir)
os.chdir(parent_dir)

Current Directory: /Users/yangblair/Documents/GitHub/LLM-Eval-NIPS/code


## Run

In [3]:
def load_jsonl_file(file_path):
    data = []
    with jsonlines.open(file_path) as reader:
        for obj in reader:
            data.append(obj)
    return '\n'.join(str(data))

In [4]:

openendtopics = [
         "Analyzing Financial Reports"
    ]
topics = openendtopics

meta = 'openend'

In [5]:
sys_prompt_template = 'Based on some questions in the field of {topic}, you will propose five orthogonal and comprehensive criteria. Your criteria will be used to qualitatively assess the AI systems. Another teacher will write evaluation cards based on these criteria.'

In [6]:
all_criteria = {}
for topic in tqdm(topics):
    file_path = f'datasets/{meta}/_raw/train/{topic}.jsonl'
    sys_prompt = sys_prompt_template.format(topic=topic) 

    sample_questions = load_jsonl_file(file_path)
    model = GPTModel(system_prompt=sys_prompt, model_name='gpt-4o')

    prompt =f'Example questions:{sample_questions}\n\n'
    prompt += "Give your reasoning, then output the list of criteria in a python list. Each criterion should be 2-5 words."
    prompt += f"""
    1. Orthogonality: The criteria should be independent of each other.
    2. Comprehensiveness: All criteria combined should cover the entire field of {topic}.
    3. Relevance: The criteria should be relevant to the field of {topic} and the questions provided.
    4. Clarity: The criteria should be clear and easy to understand.
    """

    prompt += """
        Use the following JSON formatting:
        {{
            "reasoning": "Your reasoning here.",
            "criteria": [
                "criterion1",
                "criterion2",
                "criterion3",
                "criterion4",
                "criterion5"
            ]
        }}
    """

    output = model(prompt, use_json=True)

    all_criteria[topic] = output["criteria"]

  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
all_criteria

{'Analyzing Financial Reports': ['Revenue Analysis',
  'Expense Evaluation',
  'Debt Assessment',
  'Investment Impact',
  'Strategic Insights']}

In [8]:
# write to meta/0_criteria.json
# read, append, write
try:
    with open(f'datasets/{meta}/0_criteria.json', 'r') as f:
        data = json.load(f)
        data.update(all_criteria)
        all_criteria = data
except:
    pass

with open(f'datasets/{meta}/0_criteria.json', 'w') as f:
    json.dump(all_criteria, f, indent=4)

In [9]:
from core.models import GPTModel
model = GPTModel(system_prompt="You are a helpful assistant.", model_name="gpt-4o")
# print(model("hello!"))
rslt = (model('How many parameters do you have? respond in json: {{"your parameter": "value, whatever you want here"}}'))
print(rslt)

```json
{
  "your parameter": "As an AI language model, I don't have parameters in the same way a traditional software application might. However, I operate based on a vast dataset and complex algorithms to generate responses."
}
```
