In [1]:
from pathlib import Path
datapath = Path("../master-database-files/master-experimental/evaluate_free_associated_triple_ambiguity/")
assert datapath.exists()

In [2]:
from os import environ
from pathlib import Path
from json import loads, dumps
from random import choice
environ["OPENAI_API_KEY"] = Path("~/.openaiapikey").expanduser().read_text().strip()
from openai import OpenAI
from random import randint

openaiClient = OpenAI()
def gpt_3_5_turbo_completion(query, temperature = 1):
    answer = openaiClient.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": query
            }
        ],
        temperature = temperature,
        seed = randint(0, 1000000)
    )
    return answer.choices[0].message.content

def gpt_4_turbo_completion(query, temperature = 1):
    answer = openaiClient.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "system",
                "content": query
            }
        ],
        temperature = temperature,
        seed = randint(0, 1000000)
    )
    return answer.choices[0].message.content

def tryRecieveAnswer(query, completionFunction = gpt_3_5_turbo_completion, answerConversion = lambda x: True, maxTries = 10, temperature = 1):
    tryNumber = 0
    while tryNumber < maxTries:
        answer = completionFunction(query, temperature)
        try:
            answer = answerConversion(answer)
            return (answer, True)
        except:
            pass
        tryNumber += 1
    print(f"Failed to recieve answer for query: {query}")
    print(f"Answer: {answer}")
    return (None, False)

def listAnswerConversion(answer):
    result = loads(answer)
    assert isinstance(result, list)
    for item in result:
        assert isinstance(item, str)
    return result

In [3]:
from random import shuffle
def generateFreeAssociatedTriplesAmbiguityTest(nameOfTheDataset, numberOfChoices, numberOfSamples):
    topicsPath = Path(f"../master-database-files/master-adapted-subtopic-tree-generation/technical_terms.txt")
    topics = topicsPath.read_text().split("\n")
    objectIdentifyingTestPath = datapath / "object-identifying-tests" / nameOfTheDataset / "test.json"
    if objectIdentifyingTestPath.exists():
        objectIdentifyingTest = loads(objectIdentifyingTestPath.read_text())
    else:
        objectIdentifyingTest = []
    objectIdentifyingTestPath.parent.mkdir(parents = True, exist_ok = True)
    with objectIdentifyingTestPath.open("w") as f:
        f.write("[")
        f.write(",".join(["\n    " + dumps(test) for test in objectIdentifyingTest]))
        f.flush()
        i = numberOfSamples - len(objectIdentifyingTest)
        while i > 0:
            nextTerm = choice(topics)
            query = f'Semantic triples such as ["Star", "emits", "Light"] and ["Rocket", "can bring cargo to", "Space"] consists of a subject, a predicate, and an object. Give me {numberOfChoices} examples of semantic triples that contain "{nextTerm}" as subject and return them in an array formatted like [["sub1", "pred1", "obj1"], ["sub2", "pred2", "obj2"], ...]. Return nothing but the array without explanation.'
            def answerConversion(answer):
                arr = loads(answer)
                assert isinstance(arr, list)
                assert len(arr) == numberOfChoices
                for item in arr:
                    assert isinstance(item, list)
                    assert len(item) == 3
                    for subitem in item:
                        assert isinstance(subitem, str)
                return arr
            answer, success = tryRecieveAnswer(query, answerConversion = answerConversion)
            if not success:
                continue
            objectChoices = [o[2] for o in answer]
            if not len(set(objectChoices)) == len(objectChoices):
                print(f"Duplicate objects in the answer: {answer}, {objectChoices}")
                continue
            shuffle(answer)
            shuffle(objectChoices)
            randomTriple = answer[0]
            otherObjects = [o for o in objectChoices if o != randomTriple[2]]
            query = 'What is the missing object for the semantic triple ["' + randomTriple[0] + '", "' + randomTriple[1] + '" , ???]? Choose the correct object from the following choices: {' + ', '.join([f"{i + 1}: \"{objectChoices[i]}\"" for i in range(len(objectChoices))]) + '}. Return the number of the correct object as a single digit without explanation.'
            def answerConversion(answer):
                assert isinstance(answer, str)
                assert answer in [str(i + 1) for i in range(len(objectChoices))]
                return objectChoices[int(answer) - 1]
            selectedObject, success = tryRecieveAnswer(query, answerConversion = answerConversion)
            if not success:
                continue
            objectIdentifyingTest.append([randomTriple[0], randomTriple[1], randomTriple[2], otherObjects, selectedObject])
            f.write(",\n" if len(objectIdentifyingTest) > 1 else "")
            f.write("    " + dumps(objectIdentifyingTest[-1]))
            f.flush()
            i -= 1
        f.write("\n]")
            

In [4]:
from math import sqrt
from scipy.special import comb 
def calculateStatisticsForFreeAssociatedTriplesAmbiguityTest(nameOfTheDataset):
    objectIdentifyingTestPath = datapath / "object-identifying-tests" / nameOfTheDataset / "test.json"
    objectIdentifyingTest = loads(objectIdentifyingTestPath.read_text())
    falseObjectSelections = 0
    trueObjectSelections = 0
    numberOfChoices = len(objectIdentifyingTest[0][3]) + 1
    numberOfSamples = len(objectIdentifyingTest)
    for test in objectIdentifyingTest:
        if test[4] == test[2]:
            trueObjectSelections += 1
        else:
            falseObjectSelections += 1
    falseObjectSelectionPropotion = falseObjectSelections / numberOfSamples
    falseObjectSelectionVariance = 0
    for k in range(0, numberOfSamples + 1):
        falseObjectSelectionVariance += ((k - falseObjectSelections) ** 2) * comb(numberOfSamples, k, exact=True) * (falseObjectSelectionPropotion ** k) * ((1 - falseObjectSelectionPropotion) ** (numberOfSamples - k))
    falseObjectSelectionPropotionError = sqrt(falseObjectSelectionVariance) / numberOfSamples
    falseObjectSelectionPerCorrectObjectSelection = falseObjectSelections / trueObjectSelections if trueObjectSelections > 0 else None
    falseObjectSelectionPerCorrectObjectSelectionComparedToRandom = falseObjectSelectionPerCorrectObjectSelection / (numberOfChoices - 1)
    # See https://github.com/gratach/thoughts/blob/master/topics/master-thesis/equation/false-choices-per-correct-choice-compared-to-random.md
    falseObjectSelectionPerCorrectObjectSelectionComparedToRandomError = falseObjectSelectionPropotionError / (numberOfChoices - 1) / ((1 - falseObjectSelectionPropotion) ** 2)
    with (datapath / "object-identifying-tests" / nameOfTheDataset / "statistics.json").open("w") as f:
        statistics = {
            "falseObjectSelections": falseObjectSelections,
            "trueObjectSelections": trueObjectSelections,
            "falseObjectSelectionPropotion": falseObjectSelectionPropotion,
            "falseObjectSelectionVariance": falseObjectSelectionVariance,
            "falseObjectSelectionPropotionError": falseObjectSelectionPropotionError,
            "falseObjectSelectionPerCorrectObjectSelection": falseObjectSelectionPerCorrectObjectSelection,
            "falseObjectSelectionPerCorrectObjectSelectionComparedToRandom": falseObjectSelectionPerCorrectObjectSelectionComparedToRandom,
            "falseObjectSelectionPerCorrectObjectSelectionComparedToRandomError": falseObjectSelectionPerCorrectObjectSelectionComparedToRandomError
        }
        f.write(dumps(statistics, indent=4))
        print(dumps(statistics, indent=4))

In [5]:
generateFreeAssociatedTriplesAmbiguityTest("freeAssTr", 5, 500)

Duplicate objects in the answer: [['Spin (particle physics)', 'is a property of', 'particles'], ['Spin (particle physics)', 'can have', 'half-integer values'], ['Spin (particle physics)', 'plays a role in', 'quantum mechanics'], ['Spin (particle physics)', 'is analogous to', 'classical angular momentum'], ['Spin (particle physics)', 'is quantized in', 'quantum mechanics']], ['particles', 'half-integer values', 'quantum mechanics', 'classical angular momentum', 'quantum mechanics']
Duplicate objects in the answer: [['Work (thermodynamics)', 'is a form of', 'energy transfer'], ['Work (thermodynamics)', 'can be positive or negative', 'based on the direction of the force'], ['Work (thermodynamics)', 'is done when a force is applied over a distance', ''], ['Work (thermodynamics)', 'is represented by the equation W = F * d * cos(theta)', ''], ['Work (thermodynamics)', 'can be done on or by a system', '']], ['energy transfer', 'based on the direction of the force', '', '', '']
Duplicate objec

In [6]:
calculateStatisticsForFreeAssociatedTriplesAmbiguityTest("freeAssTr")

{
    "falseObjectSelections": 316,
    "trueObjectSelections": 184,
    "falseObjectSelectionPropotion": 0.632,
    "falseObjectSelectionVariance": 116.28800000000004,
    "falseObjectSelectionPropotionError": 0.02156738278048591,
    "falseObjectSelectionPerCorrectObjectSelection": 1.7173913043478262,
    "falseObjectSelectionPerCorrectObjectSelectionComparedToRandom": 0.42934782608695654,
    "falseObjectSelectionPerCorrectObjectSelectionComparedToRandomError": 0.039814550560620554
}


In [5]:
def listMostUsedTripleParts(filepath, outputPath):
    triples = [[data[0], data[1], data[2]] for data in loads(filepath.read_text())]
    subjects = {}
    predicates = {}
    objects = {}
    for part in triples:
        if part[0].lower() in subjects:
            subjects[part[0].lower()] += 1
        else:
            subjects[part[0].lower()] = 1
        if part[1].lower() in predicates:
            predicates[part[1].lower()] += 1
        else:
            predicates[part[1].lower()] = 1
        if part[2].lower() in objects:
            objects[part[2].lower()] += 1
        else:
            objects[part[2].lower()] = 1
    # Sort the parts by their frequency
    subjects = sorted(subjects.items(), key=lambda item: item[1], reverse=True)
    predicates = sorted(predicates.items(), key=lambda item: item[1], reverse=True)
    objects = sorted(objects.items(), key=lambda item: item[1], reverse=True)
    # Use only those parts that are used more than once
    subjects = [subject for subject in subjects if subject[1] > 1]
    predicates = [predicate for predicate in predicates if predicate[1] > 1]
    objects = [object for object in objects if object[1] > 1]
    outputPath.write_text(dumps({
        "subjects": subjects,
        "predicates": predicates,
        "objects": objects
    }, indent=2))

In [6]:
listMostUsedTripleParts(Path("../master-database-files/master-experimental/evaluate_free_associated_triple_ambiguity/object-identifying-tests/freeAssTr/test.json"), Path("../master-database-files/master-experimental/evaluate_free_associated_triple_ambiguity/object-identifying-tests/freeAssTr/most-used-parts.json"))

In [7]:
def printAveragePredicateWordCount(filepath):
    triples = [[data[0], data[1], data[2]] for data in loads(filepath.read_text())]
    predicateWordCount = 0
    for part in triples:
        predicateWordCount += len(part[1].split())
    averageWordLength = predicateWordCount / len(triples)
    averageWordLengthError = (sum([(len(triple[1].split()) - averageWordLength) ** 2 for triple in triples]) / len(triples) / (len(triples) - 1)) ** 0.5
    print(f"Average word count of predicates: {averageWordLength} ± {averageWordLengthError}")

In [8]:
printAveragePredicateWordCount(Path("../master-database-files/master-experimental/evaluate_free_associated_triple_ambiguity/object-identifying-tests/freeAssTr/test.json"))

Average word count of predicates: 2.432 ± 0.07319471930315548
