In [1]:
import json, os, re
from tqdm import tqdm
from src.intension import Intension
from src.utils import confusion_matrix

In [2]:
MODELS = [ 
    { "model_name": "google/gemma-7b-it", "batch_size": 50 },
    { "model_name": "gpt-3.5-turbo", "batch_size": 50 },
    { "model_name": "gpt-4-0125-preview", "batch_size": 50 },
    { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "batch_size": 50 },
    { "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1", "batch_size": 50 },
    # { "model_name": "claude-3-opus-20240229", "batch_size": 1 },
]
DATA_DIRECTORY = "data/ISWC"
DATA_FILENAME_PATTERN = r"(P.+)\.json$"

In [3]:
files = [file for file in os.listdir(DATA_DIRECTORY) if os.path.isfile(os.path.join(DATA_DIRECTORY, file))]
matched_files = [file for file in files if re.match(DATA_FILENAME_PATTERN, file)]
for file in matched_files:
    m = re.match(DATA_FILENAME_PATTERN, file)
    property = m.group(1)
    data = json.load(open(os.path.join(DATA_DIRECTORY, file), 'r'))
    for model in MODELS:
        filename = f'experiments/{model["model_name"].split("/")[-1]}-kamel-{property}.json'
        if os.path.isfile(filename):
            print(f'{property:>5} {model["model_name"]:36}: EXISTS')
        else:
            results = []
            queries = [
                {
                    "predicate": datum["predicate"]["label"],
                    "arguments": ", ".join([ arg["label"] for arg in datum["arguments"] ]),
                    "world": datum["predicate"]["definition"] + " " + " ".join([ arg["description"] for arg in datum["arguments"] ]),
                    "actual": datum["in_extension"]
                }
                for datum in data
            ]
            batches = [ queries[i:i+model["batch_size"]] for i in range(0, len(queries), model["batch_size"]) ] 
            intension = Intension(model_name=model["model_name"])
            for batch in tqdm(batches, desc=f'{property:>5} {model["model_name"]:36}', total=len(batches)):
                response = intension.chain.batch(batch)
                for i, result in enumerate(response):
                    result["rationale"] = result["text"]["rationale"]
                    result["predicted"] = result["text"]["answer"]
                    result.pop("text")
                results.extend(response)
            json.dump(results, open(filename, "w+"))

 P509 google/gemma-7b-it                  : EXISTS
 P509 gpt-3.5-turbo                       : EXISTS
 P509 gpt-4-0125-preview                  : EXISTS
 P509 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P509 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
 P159 google/gemma-7b-it                  : EXISTS
 P159 gpt-3.5-turbo                       : EXISTS
 P159 gpt-4-0125-preview                  : EXISTS
 P159 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P159 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
P5353 google/gemma-7b-it                  : EXISTS
P5353 gpt-3.5-turbo                       : EXISTS
P5353 gpt-4-0125-preview                  : EXISTS
P5353 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
P5353 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
 P197 google/gemma-7b-it                  : EXISTS
 P197 gpt-3.5-turbo                       : EXISTS
 P197 gpt-4-0125-preview                  : EXISTS
 P197 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P197 mistralai/Mixtral-8x7B-In

 P931 mistralai/Mistral-7B-Instruct-v0.2  : 100%|██████████| 3/3 [00:10<00:00,  3.58s/it]
 P931 mistralai/Mixtral-8x7B-Instruct-v0.1: 100%|██████████| 3/3 [01:11<00:00, 23.79s/it]
 P710 google/gemma-7b-it                  : 100%|██████████| 3/3 [00:37<00:00, 12.50s/it]
 P710 gpt-3.5-turbo                       : 100%|██████████| 3/3 [00:15<00:00,  5.15s/it]
 P710 gpt-4-0125-preview                  : 100%|██████████| 3/3 [02:50<00:00, 56.72s/it]
 P710 mistralai/Mistral-7B-Instruct-v0.2  : 100%|██████████| 3/3 [00:38<00:00, 12.73s/it]
 P710 mistralai/Mixtral-8x7B-Instruct-v0.1: 100%|██████████| 3/3 [01:32<00:00, 30.84s/it]
  P39 google/gemma-7b-it                  : 100%|██████████| 4/4 [00:34<00:00,  8.72s/it]
  P39 gpt-3.5-turbo                       : 100%|██████████| 4/4 [00:18<00:00,  4.66s/it]
  P39 gpt-4-0125-preview                  : 100%|██████████| 4/4 [01:27<00:00, 21.76s/it]
  P39 mistralai/Mistral-7B-Instruct-v0.2  : 100%|██████████| 4/4 [00:32<00:00,  8.10s/it]
  P39 mist