In [1]:
from openai import OpenAI
from random import randint, choice
from os import environ
from pathlib import Path
from json import loads, dumps
environ["OPENAI_API_KEY"] = Path("~/.openaiapikey").expanduser().read_text().strip()

openaiClient = OpenAI()
def gpt_3_5_turbo_completion(query):
    answer = openaiClient.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": query
            }
        ],
        seed = randint(0, 1000000)
    )
    return answer.choices[0].message.content

def gpt_4_turbo_completion(query):
    answer = openaiClient.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "system",
                "content": query
            }
        ],
        seed = randint(0, 1000000)
    )
    return answer.choices[0].message.content

def tryRecieveAnswer(query, completionFunction = gpt_3_5_turbo_completion, answerConversion = lambda x: x, maxTries = 10):
    tryNumber = 0
    while tryNumber < maxTries:
        answer = completionFunction(query)
        try:
            answer = answerConversion(answer)
            return (answer, True)
        except:
            pass
        tryNumber += 1
    print(f"Failed to recieve answer for query: {query}. Last answer: {answer}")
    return (None, False)

def listAnswerConversion(answer):
    result = loads(answer)
    assert isinstance(result, list)
    for item in result:
        assert isinstance(item, str)
    return result

In [3]:
gpt_4_turbo_completion('Give me the names of five experiments in physics. Return them formatted as ["experiment 1", "experiment 2", ...]. Return nothing but this list.')

'["Double-slit experiment", "Cavendish experiment", "Michelson-Morley experiment", "Rutherford gold foil experiment", "Franck-Hertz experiment"]'

In [5]:
dataRootPath = Path("../master-database-files/master-experimental/experiments_in_physics")

In [10]:
def filterWikidataExperiments():
    csvPath = dataRootPath / "wikidata_experiments.csv"
    if not csvPath.exists():
        return
    experiments = [exp.split(",")[1] for exp in csvPath.read_text().split("\n")[1:] if exp]
    physicsExperiments = []
    for experiment in experiments:
        query = f'Is the experiment "{experiment}" a physics experiment? Return Y or N without explanation.'
        def answerConversion(answer):
            assert answer.strip().lower() in ["y", "n"]
            return answer.strip().lower() == "y"
        answer, success = tryRecieveAnswer(query, answerConversion = answerConversion)
        if success and answer:
            physicsExperiments.append(experiment)
    physicsExperimentsPath = dataRootPath / "wikidata_physics_experiments.txt"
    physicsExperimentsPath.write_text("\n".join(physicsExperiments))

In [11]:
filterWikidataExperiments()