In [1]:
import json
from datetime import datetime
from tqdm import tqdm
from shroom_classifier_v13 import ShroomClassifier

In [2]:
MODEL_NAME = "gpt-3.5-turbo"

MODEL_AGNOSTIC_FILENAME = 'reference/val.model-agnostic.json'

MODEL_AWARE_FILENAME = 'reference/val.model-aware.json'

TEMPERATURE_STUDY_FILENAME = 'temperature_study_model-agnostic.json'

EXAMPLES_PER_LABEL_STUDY_FILENAME = 'examples_per_label_study_model-agnostic.json'

SAMPLES_PER_QUERY_STUDY_FILENAME = 'samples_per_query_study_model-agnostic.json'

In [3]:
def run_experiment(config):
    start = datetime.utcnow().isoformat()
    cp = ShroomClassifier(config["model_name"], config["temperature"], config["examples_per_class"])
    predictions = []
    data = json.load(open(config["dataset_filename"], 'r'))
    for dp in tqdm(data):
        prediction = cp.classify(
            dp, 
            N=config["samples"], 
            task_defined=config["task_defined"], 
            role_defined=config["role_defined"], 
            hallucination_defined=config["hallucination_defined"], 
            examples=config["examples"]
        )
        predictions.append(prediction)
    return { 
        "start": start, 
        "end": datetime.utcnow().isoformat(), 
        "config": config, 
        "data": data, 
        "predictions": predictions 
    }

In [4]:
temperature_settings = [
    { "temperature": 0.1, "samples": 5, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 0.5, "samples": 5, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 5, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.5, "samples": 5, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
]

examples_per_label_settings = [
    { "temperature": 1.0, "samples": 5, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 5, "examples_per_class": 2, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 5, "examples_per_class": 3, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 5, "examples_per_class": 4, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 5, "examples_per_class": 5, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
]

samples_settings = [
    { "temperature": 1.0, "samples": 1, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 5, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 10, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 15, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 20, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
    { "temperature": 1.0, "samples": 25, "examples_per_class": 1, "examples": True, "task_defined": True, "role_defined": True, "hallucination_defined": True },
]

In [5]:
temperature_configs = []
for setting in temperature_settings:
    for dataset in [ MODEL_AGNOSTIC_FILENAME ]: # MODEL_AWARE_FILENAME ]: 
        for run in range(3):
            temperature_configs.append(
                {
                    "run": run,
                    "model_name": MODEL_NAME,
                    "dataset_filename": dataset,
                    "temperature": setting["temperature"],
                    "examples_per_class": setting["examples_per_class"],
                    "samples": setting["samples"],
                    "task_defined": setting["task_defined"],
                    "role_defined": setting["role_defined"],
                    "hallucination_defined": setting["hallucination_defined"],
                    "examples": setting["examples"]
                }
            )
len(temperature_configs)

12

In [6]:
examples_per_label_configs = []
for setting in examples_per_label_settings:
    for dataset in [ MODEL_AGNOSTIC_FILENAME ]: # MODEL_AWARE_FILENAME ]: 
        for run in range(3):
            examples_per_label_configs.append(
                {
                    "run": run,
                    "model_name": MODEL_NAME,
                    "dataset_filename": dataset,
                    "temperature": setting["temperature"],
                    "examples_per_class": setting["examples_per_class"],
                    "samples": setting["samples"],
                    "task_defined": setting["task_defined"],
                    "role_defined": setting["role_defined"],
                    "hallucination_defined": setting["hallucination_defined"],
                    "examples": setting["examples"]
                }
            )
len(examples_per_label_configs)

15

In [7]:
samples_configs = []
for setting in samples_settings:
    for dataset in [ MODEL_AGNOSTIC_FILENAME ]: # MODEL_AWARE_FILENAME ]: 
        for run in range(3):
            samples_configs.append(
                {
                    "run": run,
                    "model_name": MODEL_NAME,
                    "dataset_filename": dataset,
                    "temperature": setting["temperature"],
                    "examples_per_class": setting["examples_per_class"],
                    "samples": setting["samples"],
                    "task_defined": setting["task_defined"],
                    "role_defined": setting["role_defined"],
                    "hallucination_defined": setting["hallucination_defined"],
                    "examples": setting["examples"]
                }
            )
len(samples_configs)

18

In [8]:
runs = []
for config in temperature_configs:
    print(config)
    runs.append(run_experiment(config))
    json.dump(runs, open(TEMPERATURE_STUDY_FILENAME, 'w+'))

{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 0.1, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [17:28<00:00,  2.10s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 0.1, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [1:09:06<00:00,  8.31s/it]   


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 0.1, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [1:35:22<00:00, 11.47s/it]    


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 0.5, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [23:15<00:00,  2.80s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 0.5, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [1:51:54<00:00, 13.46s/it]    


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 0.5, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [15:14<00:00,  1.83s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [13:48<00:00,  1.66s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [24:35<00:00,  2.96s/it]  


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [25:48<00:00,  3.10s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.5, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [31:15<00:00,  3.76s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.5, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [39:40<00:00,  4.77s/it]   


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.5, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [19:13<00:00,  2.31s/it]  


In [9]:
runs = []
for config in examples_per_label_configs:
    print(config)
    runs.append(run_experiment(config))
    json.dump(runs, open(EXAMPLES_PER_LABEL_STUDY_FILENAME, 'w+'))

{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [10:29<00:00,  1.26s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [21:15<00:00,  2.56s/it]  


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [29:15<00:00,  3.52s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 2, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [1:05:21<00:00,  7.86s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 2, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [35:31<00:00,  4.27s/it]    


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 2, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [15:55<00:00,  1.91s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 3, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [11:28<00:00,  1.38s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 3, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [32:42<00:00,  3.93s/it]   


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 3, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [20:47<00:00,  2.50s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 4, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [29:22<00:00,  3.53s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 4, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [26:13<00:00,  3.15s/it]  


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 4, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [15:35<00:00,  1.87s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 5, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [13:29<00:00,  1.62s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 5, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [17:22<00:00,  2.09s/it]  


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 5, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [15:53<00:00,  1.91s/it]  


In [8]:
runs = []
for config in samples_configs:
    print(config)
    runs.append(run_experiment(config))
    json.dump(runs, open(SAMPLES_PER_QUERY_STUDY_FILENAME, 'w+'))

{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 1, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [06:42<00:00,  1.24it/s]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 1, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [04:00<00:00,  2.07it/s]


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 1, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [03:15<00:00,  2.56it/s]


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [17:10<00:00,  2.07s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [21:28<00:00,  2.58s/it]  


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 5, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [21:07<00:00,  2.54s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 10, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [35:26<00:00,  4.26s/it]   


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 10, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [57:02<00:00,  6.86s/it]   


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 10, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [26:41<00:00,  3.21s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 15, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [2:34:54<00:00, 18.63s/it]    


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 15, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [42:35<00:00,  5.12s/it]  


{'run': 2, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 15, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [1:06:13<00:00,  7.96s/it]  


{'run': 0, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 20, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


100%|██████████| 499/499 [1:30:26<00:00, 10.88s/it]  


{'run': 1, 'model_name': 'gpt-3.5-turbo', 'dataset_filename': 'reference/val.model-agnostic.json', 'temperature': 1.0, 'examples_per_class': 1, 'samples': 20, 'task_defined': True, 'role_defined': True, 'hallucination_defined': True, 'examples': True}


 17%|█▋        | 86/499 [14:41<1:10:34, 10.25s/it]


KeyboardInterrupt: 