# Dgraph Walkthrough

In [None]:
%load_ext autoreload
%autoreload 2
import nltk
nltk.download('stopwords')
from graphrank.graphrank import GraphRank
from graphrank.utils import GraphUtils, TextPreprocess
from graphrank import dgraph as dg
from graphrank import build_meeting_graph as meet_graph
import networkx as nx
import text_preprocessing.preprocess as tp
import datetime
import json
import time

import pydgraph

### Load a sample meeting file

build_meeting_graph parses the json file & converts that as a graph object with better structure

In [None]:
json_file = 'meeting.json'
meet_data = meet_graph.read_json(json_file)
mg = meet_graph.build_meeting_graph(json_file)
mg

### Create a client stub token with the Dgraph Zero & connect with the server

In [None]:
def create_client_stub():
    return pydgraph.DgraphClientStub('111.93.155.194:9080')

def create_client(client_stub):
    return pydgraph.DgraphClient(client_stub)

client_stub = create_client_stub()

client = create_client(client_stub)

### For testing purposes drop all the DB contents and set schema for Meeting Level Graph

In [None]:
def drop_all(client):
    return client.alter(pydgraph.Operation(drop_all=True))


def set_schema(client):
    schema = """
    contextId: string @index(exact) .
    contextInstanceId: string @index(exact).
    contextInstanceIds: uid .
    segments: uid .
    segmentId: string @index(exact) .
    keyword: string @index(exact) .
    name: string @index(exact) .
    meetingid: string @index(exact) .
    score: int .
    neighbour: uid @reverse .
    user.id: string @index(exact) .
    """
    return client.alter(pydgraph.Operation(schema=schema))

drop_all(client)

set_schema(client)

### using the meeting level nx graph object, get all the nodes, edges, contextId & contextInstanceId.

In [None]:
graph_obj = list(mg.edges.data())
contextId = graph_obj[0][0]
contextInstanceId = graph_obj[0][1]

# Used as a counter
nodes_count = 0
edges_count = 0
segments_count = 0
global initial
initial = True

In [None]:
# The below code uses Networkx package to build custom graph of the keywords

def build_custom_graph(input_text, window=2):
    graph = nx.Graph()
    
    graph.add_nodes_from([word[0] for word in input_text])
    
    
    for i, (node1, pos) in enumerate(input_text):
        if not pos:
            continue

        for j in range(i + 1, min(i + window, len(input_text))):
            node2, pos2 = input_text[j]
            if pos2 and node1 != node2:
                graph.add_edge(node1, node2)
    return graph


# The below function updates a nodes (specifically wordgraph node) in the Graph DB

def write_node(client, node, contextInstanceId, contextInstanceId_uid, position, seg_position):
    txn = client.txn()
    global initial 
    try:
        # The below query checks if a keyword name exist in the graph from the filtered subgraph of same context Instance Id ie. same meeting
        
        query = """query all($a: string, $b: string) {
                    all(func: eq(name, $a)) @filter(eq(contextInstanceId, $b)){
                            uid
                        }
                    }"""
    
        variables = {'$a': node, '$b': contextInstanceId }
        res = client.query(query, variables=variables)
        ppl = json.loads(res.json)

        # IF keyword node doesn't exist, create the node.
        
        if len(ppl['all'])==0:
            data = {
                'uid' : '_:' + node,
                'name' : node,
                'contextInstanceId' : contextInstanceId
            }


            assigned = txn.mutate(set_obj=data,commit_now=True)
            txn = client.txn()
            
            for i in assigned.uids:
                keyword_uid = assigned.uids[i]


            # IF creating first node, create a link btw first keyword & ContextInstanceId
            #if position==0 and seg_position==1:
            if initial == True:
                data = {
                    'uid' : contextInstanceId_uid,
                    'wordGraph' : {
                        'uid' : keyword_uid
                    }
                }
                assigned = txn.mutate(set_obj=data,commit_now=True)
                txn = client.txn()
                initial = False

    except Exception as e:
        
            print('Error: {}'.format(e))
    txn.commit()
    return True


# The below function updates a relationship (specifically btw wordgraph nodes) in the Graph DB


def write_edge(client, node, contextInstanceId, contextInstanceId_uid, text):
    txn = client.txn()
    
    try:
        # The below query gets the UID of the Node A where a relationship needs to be drawn btw (Node A, Node B)

        query = """query all($a: string, $b: string) {
                       all(func: eq(name, $a)) @filter(eq(contextInstanceId, $b)){
                           uid
                       }
                }"""

        variables = {'$a': node[0], '$b': contextInstanceId }
        res = client.query(query, variables=variables)
        ppl = json.loads(res.json)
        #print (ppl)
        
        node_left = ppl['all'][0]['uid']

        # The below query gets the UID of the Node B where a relationship needs to be drawn btw (Node A, Node B)

        query = """query all($a: string, $b: string) {
                       all(func: eq(name, $a)) @filter(eq(contextInstanceId, $b)){
                           uid
                       }
                }"""

        variables = {'$a': node[1], '$b': contextInstanceId }
        res = client.query(query, variables=variables)
        ppl = json.loads(res.json)


        node_right = ppl['all'][0]['uid']

        # After recieving both the uid of Node A & Node B, draw the relationship btw them.
        
        data = {
            'uid': node_left,
            'neighbour': [
                {
                    'uid': node_right
                }
            ]
        }


        assigned = txn.mutate(set_obj=data,commit_now=True)
        txn = client.txn()

    except Exception as e:
            #print (text)
            #print (node[0])
            print('Error: {}'.format(e))
    txn.commit()
    return True



### Start the Db transaction & perform necessary updates & queries on the GraphDB

In [None]:
txn = client.txn()   #creates a Transaction object

# The below query takes in the contextId and does a indexed search on the graph to get the Unique Integer Id (uid)
# eq function checks for equality on the indexed node.

query = """query all($a: string) {
               all(func: eq(contextId, $a)) {
                   uid
                }
           }"""

variables = {'$a': contextId }
res = client.query(query, variables=variables)  # client.query() operation is executed on a local copy of the graph.
ppl = json.loads(res.json)


# The below code checks if the result recieved from the previous query contains the uid, if it does, it initializes to a local var.
if len(ppl['all'])>0 :
        contextId_uid = ppl['all'][0]['uid']

# If the query doesn't have uid, ie. the instanceId doesn't exist in the graph, we create a new InstanceId.

else:
    print ("Creating new ContextId")

    data = {
        'uid': '_:' + contextId,     # <underscore><colon><Identifier> '_:identifier' is the RDF format to create a new uid.
        'contextId' : contextId,            
    }

    # The mutate operation executes an update query on the graph DB.
    assigned = txn.mutate(set_obj=data,commit_now=True)
    txn = client.txn()

    # Get the UID of the recently created Node, ie. contextId.
    for i in assigned.uids:
        contextId_uid = assigned.uids[i]

    print ("Context ID: ", contextId_uid)


    # Follow the same methodology on contextInstanceId, ie. Create if it does not exist.

    query = """query all($a: string) {
                   all(func: eq(contextInstanceId, $a)) {
                       uid
                   }
            }"""

    variables = {'$a': contextInstanceId }
    res = client.query(query, variables=variables)
    ppl = json.loads(res.json)

    if len(ppl['all'])>0 :
        contextInstanceId_uid = ppl['all'][0]['uid']

    else:
        print ("Creating new ContextInstanceId")

        data = {
            'uid': '_:' + contextInstanceId, 
            'contextInstanceId' : contextInstanceId
        }

        assigned = txn.mutate(set_obj=data,commit_now=True)
        txn = client.txn()

        for i in assigned.uids:
            contextInstanceId_uid = assigned.uids[i]

        print ("Creating a relation between context ID and Context Instance Id")

        # The below code creates a realationship (edge or predicate) from context Id to Context Instance Id

        data = {
            'uid' : contextId_uid,
            'contextInstanceIds':[
                {
                    'uid' : contextInstanceId_uid
                }
            ]
        }

        assigned = txn.mutate(set_obj=data,commit_now=True)
        txn = client.txn()

    print ("Context Instance ID: ", contextInstanceId_uid)

    print ("\nCreating Segments: ")

    # Get all the contents from the graph_obj including segments, users & their properties.

    for seg_position, seg in enumerate(graph_obj[1:]):


        # Iterate through the segments and compute word graph & the segments properties.
        if ('rel' in seg[2].keys() ):
            if seg[2]['rel']=="has_segment":
                print ("Processing Segment " + str(seg_position) )
                transcribedBy = ""
                segments_count+=1
                segmentId = seg[1]
                check1, check2 = False, False
                for s in graph_obj:
                    if 'rel' in seg[2].keys() and s[2]['rel']=="transcribed_by"  and s[0]==seg[1]:
                        transcribedBy = s[1]
                        check1 = True
                    if 'rel' in seg[2].keys() and s[2]['rel']=="spoken_by" and s[0]==seg[1]:
                        spokenBy = s[1]
                        check2 = True
                    if check1 and check2:
                        break

                # Create User Node for each Segment

                query = """query all($a: string) {
                           all(func: eq(user.id, $a)) {
                               uid
                           }
                        }"""

                variables = {'$a': spokenBy }
                res = client.query(query, variables=variables)
                ppl = json.loads(res.json)
                if len(ppl['all'])>0 :
                    user_uid = ppl['all'][0]['uid']

                else:
                    data = {
                            'uid' : '_:' + spokenBy,
                            'user.id' : spokenBy
                        }

                    assigned = txn.mutate(set_obj=data,commit_now=True)
                    txn = client.txn()

                    for i in assigned.uids:
                        user_uid = assigned.uids[i]

                # Create Segment Node
 
                data = {
                    'uid': '_:' + segmentId, 
                    'segmentId' : segmentId,
                    'transcriber' : transcribedBy,
                    'spokenBy' : {
                        'uid' : user_uid
                    }
                }

                assigned = txn.mutate(set_obj=data,commit_now=True)
                txn = client.txn()

                for i in assigned.uids:
                    segmentId_uid = assigned.uids[i]

                # draw a realationship (edge or predicate) from Context Instance ID to Segment Node

                data = {
                    'uid' : contextInstanceId_uid,
                    'segmentIds':[
                        {
                            'uid' : segmentId_uid
                        }
                    ]
                }


                assigned = txn.mutate(set_obj=data,commit_now=True)
                txn = client.txn()


                # The below code computes nodes and edges for each segment which are iterated and extends the Dgraph on every Iter

                if len(seg[2]['text'])!=0:

                    # Text Preprocessing using custom Package
                    text_sent, text_pos = tp.preprocess(seg[2]['text'], stop_words=False, lemma=False, pos=True)
                    pos_text = [(word,pos) for sent in text_pos for word, pos in sent]

                    # Building custom Nx Graph and returning a object 
                    g = build_custom_graph(pos_text,window=2)
                    nodes = list(dict(g.nodes()).keys())
                    edges = list(dict(g.edges()).keys())
                    nodes_count+=len(nodes)
                    edges_count+=len(edges)*2

                    # Create node w/ properties for everykeyword in the segment.
                    for position, node in enumerate(nodes):
                        write_node(client, node, contextInstanceId, contextInstanceId_uid, position, seg_position)

                    # Create relationship between Nodes ie. Keywords.
                    for edge in edges:
                        write_edge(client, edge, contextInstanceId, contextInstanceId_uid, seg[2]['text'])


# commit the local ref graph to the Dgraph daemon

txn.commit()
print ("Word Graph Nodes Count: ", nodes_count, "Word Graph Edges Count: ", edges_count, "Segments Count: ", segments_count)



## The visualisation of the graph can be done by going to http://111.93.155.194:8000/ click lauch offline and copy paste the below query.
~~~~
{
 find(func: eq(contextId,"6baa3490-69d6-48fc-b5d4-3994e3e8fae0")) @recurse{
      expand(_all_){
        expand(_all_)
      }
    }
}
~~~~

![example Visualization](dgraph_test.png)

## or use the below code to see the json response

In [None]:
client_stub = pydgraph.DgraphClientStub('111.93.155.194:9080')
client = pydgraph.DgraphClient(client_stub)
contextInstanceId = "b34997"

query = """query all($a: string) {
                   all(func: eq(contextInstanceId, $a)) {
                       uid
                   }
            }"""
    
variables = {'$a': contextInstanceId }
res = client.query(query, variables=variables)
ppl = json.loads(res.json)
    
if len(ppl['all'])>0 :
    contextInstanceId_uid = ppl['all'][0]['uid']

query = """ query all($a: string){
                find(func: uid($a)) @recurse{
                expand(_all_){
                    expand(_all_)
              }
            }
            }
        """

#node = 'hey'
#variables = {'$a': node, '$b': contextInstanceId }
variable = {'$a': contextInstanceId_uid}
res = client.query(query, variables=variable)
ppl = json.loads(res.json)
ppl