In [1]:
import json, pycm, pandas as pd
from nl_classifier import NaturalLanguageClassifer
from datetime import datetime

In [2]:
MODEL_NAME = 'gpt-4'
TEMPERATURE = 0.1
RESULTS_FILENAME = 'caligraph_gpt-4_description_results.json'

In [3]:
experiments = json.load(open(RESULTS_FILENAME, 'r'))

In [4]:
for i, experiment in enumerate(experiments):
    cls = experiment["concept"]
    classifier = NaturalLanguageClassifer(cls["id"], cls["label"], cls["definition"], MODEL_NAME, TEMPERATURE)
    print(f'{i+1:02}: {cls["label"]:30} {cls["id"]}')
    data = experiment["data"]
    for j, entity in enumerate(data):
        if "predicted" in entity:
            print(f'   {j+1:02}: {entity["label"]:30} {entity["id"]:45} SKIPPING')
        else:
            print(f'   {j+1:02}: {entity["label"]:30} {entity["id"]:45} {entity["actual"]:10} {classifier.tokens_used(entity["label"] + " " + entity["description"]):05} tokens ', end=" ")
            classification = classifier.classify(entity["label"], entity["description"])
            entity["predicted"] = classification["predicted"].lower()
            if entity["actual"] != entity["predicted"]:
                if entity["actual"] == "positive":
                    print("FN")
                else:
                    print("FP")
            else:
                print("")
            entity["rationale"] = classification["rationale"]
        df_results = pd.DataFrame.from_records(data)
        cm = pycm.ConfusionMatrix(df_results["actual"].tolist(), df_results["predicted"].tolist(), digit=2, classes=[ 'positive', 'negative' ])
        experiment["confusion_matrix"] = cm.matrix
        experiment["created"] = datetime.now().isoformat()
        json.dump(experiments, open(RESULTS_FILENAME, 'w+'))


01: Argentine football manager     http://caligraph.org/ontology/Argentine_football_manager
   01: Ariel Holan                    http://caligraph.org/resource/Ariel_Holan     SKIPPING
   02: César Luis Menotti             http://caligraph.org/resource/C%C3%A9sar_Luis_Menotti SKIPPING
   03: Horacio Bongiovanni            http://caligraph.org/resource/Horacio_Bongiovanni SKIPPING
   04: José García Pérez              http://caligraph.org/resource/Jos%C3%A9_Garc%C3%ADa_P%C3%A9rez SKIPPING
   05: José Varacka                   http://caligraph.org/resource/Jos%C3%A9_Varacka SKIPPING
   06: Lucas Ochandorena              http://caligraph.org/resource/Lucas_Ochandorena SKIPPING
   07: Hector Alfredo D'Angelo        http://caligraph.org/resource/Hector_Alfredo_D'Angelo SKIPPING
   08: Juan Brown                     http://caligraph.org/resource/Juan_Brown      SKIPPING
   09: Juan Quarterone                http://caligraph.org/resource/Juan_Quarterone SKIPPING
   10: Rubén Darío Insúa      