In [19]:
import json, os, re, time
from datetime import datetime
from tqdm import tqdm
from src.intension2 import Intension

In [13]:
MODELS = [ 
    # { "model_name": "google/gemma-7b-it", "batch_size": 50 },
    # { "model_name": "gpt-3.5-turbo", "batch_size": 50 },
    # { "model_name": "gpt-4o-2024-05-13", "batch_size": 50 },
    { "model_name": "mistralai/Mistral-7B-Instruct-v0.2", "batch_size": 50 },
    { "model_name": "mistralai/Mixtral-8x7B-Instruct-v0.1", "batch_size": 50 },
    { "model_name": "meta-llama/Meta-Llama-3-70B-Instruct", "batch_size": 50 },
    # { "model_name": "claude-3-opus-20240229", "batch_size": 1 },
]
DATA_DIRECTORY = "data/NESY"
DATA_FILENAME_PATTERN = r"(P.+)\.json$"

In [29]:
files = [file for file in os.listdir(DATA_DIRECTORY) if os.path.isfile(os.path.join(DATA_DIRECTORY, file))]
matched_files = [file for file in files if re.match(DATA_FILENAME_PATTERN, file)]
for file in matched_files[:20]:
    m = re.match(DATA_FILENAME_PATTERN, file)
    property = m.group(1)
    data = json.load(open(os.path.join(DATA_DIRECTORY, file), 'r'))
    for model in MODELS:
        filename = f'experiments2/{model["model_name"].split("/")[-1]}-kamel-{property}.json'
        if os.path.isfile(filename):
            print(f'{property:>5} {model["model_name"]:36}: EXISTS')
        else:
            results = []
            queries = [
                {
                    "predicate": datum["predicate"]["label"],
                    "arguments": ", ".join([ arg["label"] for arg in datum["arguments"] ]),
                    "world": datum["predicate"]["definition"] + " " + " ".join([ arg["description"] for arg in datum["arguments"] ]),
                    "actual": datum["in_extension"]
                }
                for datum in data
            ]
            batches = [ queries[i:i+model["batch_size"]] for i in range(0, len(queries), model["batch_size"]) ] 
            intension = Intension(model=model["model_name"])
            for batch in tqdm(batches, desc=f'{property:>5} {model["model_name"]:36}', total=len(batches)):
                response = intension.chain.batch(batch)
                # response = None
                # while not response:
                #     try:
                #         response = intension.chain.batch(batch)
                #     except:
                #         print(f"Pausing for an hour starting {datetime.now().isoformat()}...")
                #         time.sleep(3600)
                for i, result in enumerate(response):
                    result["rationale"] = result["text"]["rationale"]
                    result["predicted"] = result["text"]["answer"]
                    result.pop("text")
                results.extend(response)
            json.dump(results, open(filename, "w+"))

 P509 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P509 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
 P509 meta-llama/Meta-Llama-3-70B-Instruct: EXISTS
 P159 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P159 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
 P159 meta-llama/Meta-Llama-3-70B-Instruct: EXISTS
P5353 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
P5353 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
P5353 meta-llama/Meta-Llama-3-70B-Instruct: EXISTS
P1408 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
P1408 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
P1408 meta-llama/Meta-Llama-3-70B-Instruct: EXISTS
 P197 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P197 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
 P197 meta-llama/Meta-Llama-3-70B-Instruct: EXISTS
P1038 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
P1038 mistralai/Mixtral-8x7B-Instruct-v0.1: EXISTS
P1038 meta-llama/Meta-Llama-3-70B-Instruct: EXISTS
 P915 mistralai/Mistral-7B-Instruct-v0.2  : EXISTS
 P915 mistralai/Mixtral-8x7B-In

 P931 mistralai/Mixtral-8x7B-Instruct-v0.1:   0%|          | 0/6 [00:00<?, ?it/s]


HfHubHTTPError: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1 (Request ID: OYoOVXNDvhr0rniW5czyt)

Rate limit reached. You reached PRO hourly usage limit. Use Inference Endpoints (dedicated) to scale your endpoint.