In [1]:
import json, pycm, pandas as pd
from nl_classifier import NaturalLanguageClassifer
from datetime import datetime

In [2]:
MODEL_NAME = 'gpt-4'
TEMPERATURE = 0.1
RESULTS_FILENAME = 'wd_gpt-4_description_from_wikipedia_results.json'

In [3]:
experiments = json.load(open(RESULTS_FILENAME, 'r'))

In [8]:
for i, experiment in enumerate(experiments):
    cls = experiment["concept"]
    if "created" in experiment:
        print(f'{i+1:02}: {cls["label"]:30} {cls["id"]} SKIPPING')
    else:
        classifier = NaturalLanguageClassifer(cls["id"], cls["label"], cls["definition"], MODEL_NAME, TEMPERATURE)
        print(f'{i+1:02}: {cls["label"]:30} {cls["id"]}')
        data = experiment["data"]
        for j, entity in enumerate(data):
            print(f'   {j+1:02}: {entity["label"]:30} {entity["id"]:45} {entity["actual"]:10} {classifier.tokens_used(entity["label"] + " " + entity["description"]):05} tokens ', end=" ")
            classification = classifier.classify(entity["label"], entity["description"])
            entity["predicted"] = classification["predicted"].lower()
            if entity["actual"] != entity["predicted"]:
                if entity["actual"] == "positive":
                    print("FN")
                else:
                    print("FP")
            else:
                print("")
            entity["rationale"] = classification["rationale"]
        df_results = pd.DataFrame.from_records(data)
        cm = pycm.ConfusionMatrix(df_results["actual"].tolist(), df_results["predicted"].tolist(), digit=2, classes=[ 'positive', 'negative' ])
        experiment["confusion_matrix"] = cm.matrix
        experiment["created"] = datetime.now().isoformat()
        json.dump(experiments, open(RESULTS_FILENAME, 'w+'))


01: History of cities              http://www.wikidata.org/entity/Q30324006 SKIPPING
02: Embedded system                http://www.wikidata.org/entity/Q193040 SKIPPING
03: Irregular galaxy               http://www.wikidata.org/entity/Q190397 SKIPPING
04: Plant variety (law)            http://www.wikidata.org/entity/Q1363241 SKIPPING
05: Microcomputer                  http://www.wikidata.org/entity/Q32738 SKIPPING
06: Equestrian sport               http://www.wikidata.org/entity/Q902378 SKIPPING
07: Fraud                          http://www.wikidata.org/entity/Q28813 SKIPPING
08: Seaplane                       http://www.wikidata.org/entity/Q115940 SKIPPING
09: Beer in Belgium                http://www.wikidata.org/entity/Q348229 SKIPPING
10: Seyfert galaxy                 http://www.wikidata.org/entity/Q213930 SKIPPING
11: Cities of Japan                http://www.wikidata.org/entity/Q494721 SKIPPING
12: Catholic higher education      http://www.wikidata.org/entity/Q557206 SKIPPING
13: