In [1]:
import json, pycm, pandas as pd
from nl_classifier import NaturalLanguageClassifer
from datetime import datetime

In [2]:
MODEL_NAME = 'gpt-4'
TEMPERATURE = 0.1
RESULTS_FILENAME = 'caligraph_gpt-4_serialization_results.json'

In [3]:
experiments = json.load(open(RESULTS_FILENAME, 'r'))

In [4]:
for i, experiment in enumerate(experiments):
    cls = experiment["concept"]
    if "created" in experiment:
        print(f'{i+1:02}: {cls["label"]:30} {cls["id"]} SKIPPING')
    else:
        classifier = NaturalLanguageClassifer(cls["id"], cls["label"], "", MODEL_NAME, TEMPERATURE)
        print(f'{i+1:02}: {cls["label"]:30} {cls["id"]}')
        data = experiment["data"]
        for j, entity in enumerate(data):
            if "predicted" in entity:
                print(f'   {j+1:02}: {entity["label"]:30} {entity["id"]:45} SKIPPING')
            else:
                print(f'   {j+1:02}: {entity["label"]:30} {entity["id"]:45} {entity["actual"]:10} {classifier.tokens_used(entity["label"]):05} tokens ', end=" ")
                classification = classifier.classify(entity["label"], "")
                entity["predicted"] = classification["predicted"].lower()
                if entity["actual"] != entity["predicted"]:
                    if entity["actual"] == "positive":
                        print("FN")
                    else:
                        print("FP")
                else:
                    print("")
                entity["rationale"] = classification["rationale"]
            df_results = pd.DataFrame.from_records(data)
            cm = pycm.ConfusionMatrix(df_results["actual"].tolist(), df_results["predicted"].tolist(), digit=2, classes=[ 'positive', 'negative' ])
            experiment["confusion_matrix"] = cm.matrix
            experiment["created"] = datetime.now().isoformat()
            json.dump(experiments, open(RESULTS_FILENAME, 'w+'))


01: Argentine football manager     http://caligraph.org/ontology/Argentine_football_manager SKIPPING
02: Populated place in Luxembourg  http://caligraph.org/ontology/Populated_place_in_Luxembourg SKIPPING
03: Single-camera television sitcom http://caligraph.org/ontology/Single-camera_television_sitcom SKIPPING
04: State leader in 2014           http://caligraph.org/ontology/State_leader_in_2014 SKIPPING
05: Hill station in India          http://caligraph.org/ontology/Hill_station_in_India SKIPPING
06: Newspaper in New South Wales   http://caligraph.org/ontology/Newspaper_in_New_South_Wales SKIPPING
07: FK Sarajevo player             http://caligraph.org/ontology/FK_Sarajevo_player SKIPPING
08: Medalist at the 2019 Summer Universiade http://caligraph.org/ontology/Medalist_at_the_2019_Summer_Universiade SKIPPING
09: Romania international rugby union player http://caligraph.org/ontology/Romania_international_rugby_union_player SKIPPING
10: 1921 film                      http://caligraph.o