In [13]:
from os import environ
from pathlib import Path
from json import loads, dumps
from random import choice
environ["OPENAI_API_KEY"] = Path("~/.openaiapikey").expanduser().read_text().strip()

from openai import OpenAI
from random import randint

openaiClient = OpenAI()
def gpt_3_5_turbo_completion(query, temperature = 1):
    answer = openaiClient.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": query
            }
        ],
        temperature = temperature,
        seed = randint(0, 1000000)
    )
    return answer.choices[0].message.content

def gpt_4_turbo_completion(query, temperature = 1):
    answer = openaiClient.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "system",
                "content": query
            }
        ],
        temperature = temperature,
        seed = randint(0, 1000000)
    )
    return answer.choices[0].message.content

def tryRecieveAnswer(query, completionFunction = gpt_4_turbo_completion, answerConversion = lambda x: x, maxTries = 10, temperature = 1):
    tryNumber = 0
    while tryNumber < maxTries:
        answer = completionFunction(query, temperature)
        try:
            answer = answerConversion(answer)
            return (answer, True)
        except:
            pass
        tryNumber += 1
    print(f"Failed to recieve answer for query: {query}")
    return (None, False)

def listAnswerConversion(answer):
    result = loads(answer)
    assert isinstance(result, list)
    for item in result:
        assert isinstance(item, str)
    return result

In [9]:

data = "the electron is a subatomic particle with a negative one elementary electric charge.[13] Electrons belong to the first generation of the lepton particle family,[14] and are generally thought to be elementary particles because they have no known components or substructure.[1] The electron's mass is approximately 1/1836 that of the proton.[15] Quantum mechanical properties of the electron include an intrinsic angular momentum (spin) of a half-integer value, expressed in units of the reduced Planck constant, ħ. Being fermions, no two electrons can occupy the same quantum state, per the Pauli exclusion principle.[14] Like all elementary particles, electrons exhibit properties of both particles and waves: They can collide with other particles and can be diffracted like light. The wave properties of electrons are easier to observe with experiments than those of other particles like neutrons and protons because electrons have a lower mass and hence a longer de Broglie wavelength for a given energy. "
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('punkt')
sentences = sent_tokenize(data)

[nltk_data] Downloading package punkt to /home/gratach/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
def markSentenceConcepts(sentence):
    query = f'The sentence "The earth is a plaet" containes the two concepts "earth" and "planet". They can be marked in the sentence by sorrounding them with curly braces: "The {{earth}} is a {{planet}}". In the same way the concepts are marked in the sentence "The {{electron}} is a {{subatomic particle}} with a {{negative electric charge}}. Please mark all concepts in the following sentence: "{sentence}". Return nothing but the marked sentence.'
    def answerConversion(answer):
        ret = []
        lastPosition = 0
        currentPosition = 0
        while currentPosition < len(answer):
            if answer[currentPosition] == '{':
                if lastPosition < currentPosition:
                    ret.append(answer[lastPosition:currentPosition])
                currentPosition += 1
                lastPosition = currentPosition
                while answer[currentPosition] != '}':
                    currentPosition += 1
                ret.append([answer[lastPosition:currentPosition]])
                lastPosition = currentPosition + 1
            currentPosition += 1
        if lastPosition < len(answer):
            ret.append(answer[lastPosition:])
        return ret
    return tryRecieveAnswer(query, answerConversion = answerConversion)[0]

In [5]:
markSentenceConcepts("The electron is a subatomic particle with a negative one elementary electric charge.")

['The ',
 ['electron'],
 ' is a ',
 ['subatomic particle'],
 ' with a ',
 ['negative one elementary electric charge'],
 '.']

In [7]:
def resolveCoreference(sentence, contextSentences):
    contextSentenceString = "[" + ", ".join([f'"{contextSentence}"' for contextSentence in contextSentences]) + "]"
    query = f'Example: The sentence "He gave her a book and she read it." with the context sentence ["John met Merry in the school"] can be resolved to "John gave Merry a book and Marry read the book". Please resolve the coreferences in the following sentence: "{sentence}" with the context sentences {contextSentenceString}. Return only the sentence with the coreference resolved.'
    return tryRecieveAnswer(query)[0]

In [10]:
def resolveSentencesCoreference(sentences):
    resolvedSentences = []
    for i in range(1, len(sentences)):
        context = sentences[i-2:i]if i > 1 else [sentences[0]] if i == 1 else []
        resolvedSentences.append(resolveCoreference(sentences[i], context))
    return resolvedSentences

In [14]:
resolvedSentences = resolveSentencesCoreference(sentences)
resolvedSentences

['Electrons belong to the first generation of the lepton particle family, and are generally thought to be elementary particles because electrons have no known components or substructure.',
 "The electron's mass is approximately 1/1836 that of the proton's mass.",
 'Quantum mechanical properties of the electron include an intrinsic angular momentum (spin) of a half-integer value, expressed in units of the reduced Planck constant, ħ.',
 'Being fermions, no two electrons can occupy the same quantum state, per the Pauli exclusion principle which states that particles of half-integer spin cannot occupy the same quantum state.',
 '"Like all elementary particles, electrons exhibit properties of both particles and waves: electrons can collide with other particles and electrons can be diffracted like light."',
 'The wave properties of electrons are easier to observe with experiments than the wave properties of other particles like neutrons and protons because electrons have a lower mass and hen

In [15]:
from json import dumps
def safeVisNetworkJSONToHTMLFile(jsonData, htmlFilePath):
    with open(htmlFilePath, "w") as htmlFile:
        htmlFile.write(
            f"""
            <!DOCTYPE html>
            <html lang="en-US">
            
            <head>
                <meta charset="UTF-8">
                <meta name="viewport" content="width=device-width, initial-scale=1">
                <title>Inline vis</title>
            </head>
            
            <body>
                <div id="mynetwork" style="width:100vw; height:100vh;"></div>
                <script src=" https://cdn.jsdelivr.net/npm/vis-data@7.1.9/peer/umd/vis-data.min.js "></script>
                <script src=" https://cdn.jsdelivr.net/npm/vis-network@9.1.9/peer/umd/vis-network.min.js "></script>
                <link href=" https://cdn.jsdelivr.net/npm/vis-network@9.1.9/styles/vis-network.min.css " rel="stylesheet">
                <script>
                    var jsonData = {dumps(jsonData)};
                    // create a network
                    var container = document.getElementById("mynetwork");
                    var options = {{}};
                    var network = new vis.Network(container, jsonData, options);
                </script>
            </body>
            
            </html>
            """
        )

In [16]:
def convertSentenceNetworkToVisNetworkJSON(sentenceNetwork):
    """
    Converts a sentence network to a vis network JSON object
    Format of sentenceNetwork:
    {
        <id string of the sentence>: [<sentence compount 1>, <sentence compount 2>, ...],
        ...
    }
    where sentence compount is eather a string,
        which means that it is not a concept but a predicate, adjective, etc.
    or a list of the form:
        [<id string of the concept>, <name string of the concept>] or
        [<id string of the concept>]
        which means that it is a concept
    """
    addedNodeIds = set()
    nodes = []
    edges = []
    internalIdCounter = 0
    def getNextInternalId():
        nonlocal internalIdCounter
        internalIdCounter += 1
        return str(internalIdCounter)
    # Create the sentence head nodes
    for sentenceID in sentenceNetwork.keys():
        sentenceID = "_" + sentenceID
        assert not sentenceID in addedNodeIds
        nodes.append({"id": sentenceID, "shape": "diamond", "size": 20, "color": {"background": "rgb(255, 70, 70)"}})
        addedNodeIds.add(sentenceID)
    # Create the sentence tails
    for sentenceID, sentenceCompounds in sentenceNetwork.items():
        lastCompointID = "_" + sentenceID
        for sentenceCompound in sentenceCompounds:
            if isinstance(sentenceCompound, str):
                id = getNextInternalId()
                nodes.append({"id": id, "label": sentenceCompound, "shape": "box", "color": {"background": "rgb(255, 230, 34)"}, "font": {"size": 15}})
            else:
                linkNodeId = getNextInternalId()
                nodes.append({"id": linkNodeId, "shape": "dot", "size": 10, "color": {"background": "rgb(255, 130, 70)"}})
                id, name, newDefined = (sentenceCompound + [None, None])[:3]
                id = "_" + id
                if not id in addedNodeIds:
                    nodes.append({"id": id, "label": name, "shape": "ellipse", "size": 40, "color": {"background": "rgb(90, 230, 255)", "border": "rgb(0, 0, 0)"}, "font": {"size": 20}})
                    addedNodeIds.add(id)
                else:
                    name = None
                if newDefined:
                    edges.append({"from": linkNodeId, "to": id, "arrows": "from", "color": {"color": "gray"}, "value" : 3})
                else:
                    edges.append({"from": linkNodeId, "to": id, "arrows": "from", "color": {"color": "gray"}})
                id = linkNodeId
            edges.append({"from": lastCompointID, "to": id, "value": 3, "arrows": "to", "color": {"color": "rgb(255, 180, 70)"}})
            lastCompointID = id
    return {"nodes": nodes, "edges": edges}

In [17]:
def saveSentenceNetworkToVisNetworkHTMLFile(sentenceNetwork, htmlFilePath):
    safeVisNetworkJSONToHTMLFile(convertSentenceNetworkToVisNetworkJSON(sentenceNetwork), htmlFilePath)

In [18]:
class SentecnceNetwork:
    def __init__(self):
        self.sentences = set()
        self.concepts = set()
        self.idCounter = 0
    def sentence(self, *sentenceCompounds):
        sentence = Sentence(self, sentenceCompounds, self.idCounter)
        self.idCounter += 1
        self.sentences.add(sentence)
        return sentence
    def concept(self, name, newDefined = False):
        concept = Concept(self, name, self.idCounter, newDefined)
        self.idCounter += 1
        self.concepts.add(concept)
        return concept
    def getNetwork(self):
        network = {}
        for sentence in self.sentences:
            network[str(sentence.sentenceID)] = [compound if isinstance(compound, str) else [str(compound.sentenceID)] if isinstance(compound, Sentence) else [str(compound.id), compound.name, compound.definingSentence == sentence] for compound in sentence.compounds]
        return network
        
class Sentence:
    def __init__(self, sentenceNetwork, sentenceCompounds, sentenceID):
        self.sentenceNetwork = sentenceNetwork
        self.sentenceID = sentenceID
        self.compounds = sentenceCompounds
        for compound in sentenceCompounds:
            if isinstance(compound, Concept):
                if compound.newDefined:
                    compound.definingSentence = self
                    compound.newDefined = False

class Concept:
    def __init__(self, sentenceNetwork, name, id, newDefined = False):
        self.sentenceNetwork = sentenceNetwork
        self.name = name
        self.id = id
        self.definingSentence = None
        self.newDefined = newDefined

In [19]:
def convertTextIntoSentenceNetwork(text):
    sentenceNetwork = SentecnceNetwork()
    sentences = sent_tokenize(text)
    sentences = resolveSentencesCoreference(sentences)
    conceptDict = {}
    for sentence in sentences:
        sentenceCompounts = markSentenceConcepts(sentence)
        for i in range(len(sentenceCompounts)):
            if isinstance(sentenceCompounts[i], list):
                conceptName = sentenceCompounts[i][0].lower()
                if not conceptName in conceptDict:
                    conceptDict[conceptName] = sentenceNetwork.concept(conceptName)
                sentenceCompounts[i] = conceptDict[conceptName]
        sentenceNetwork.sentence(*sentenceCompounts)
    return sentenceNetwork

In [20]:
sentenceNet = convertTextIntoSentenceNetwork(data)
saveSentenceNetworkToVisNetworkHTMLFile(sentenceNet.getNetwork(), "sentenceNetwork.html")