# Construct a RAG based on a Knowledge Graph out of text using a LLM 

## Part 2

1. Store the Graph into a Graph Database


## Initialize graph database

In [117]:
################ Start KUZU Graph DB #################
import shutil
shutil.rmtree("./kuzu", ignore_errors=True)
import kuzu
db = kuzu.Database("kuzu")
conn = kuzu.Connection(db)

In [118]:
conn.execute("CREATE NODE TABLE Article(page STRING, language STRING, PRIMARY KEY (page))")
conn.execute("CREATE NODE TABLE Chunk(uuid STRING, content STRING, PRIMARY KEY (uuid))")
conn.execute("CREATE NODE TABLE Concept(name STRING, importance INT32, category STRING, PRIMARY KEY (name))")

conn.execute("CREATE REL TABLE ChunkToArticle(FROM Chunk TO Article, relation_type STRING)")
conn.execute("CREATE REL TABLE ConceptToChunk(FROM Concept TO Chunk, relation_type STRING)")
conn.execute("CREATE REL TABLE ConceptToConcept(FROM Concept TO Concept, relation STRING, relation_type STRING, weight INT32, chunk_ids STRING)")


<kuzu.query_result.QueryResult at 0x11fd38350>

In [119]:
tables = []
results = conn.execute("CALL SHOW_TABLES() RETURN *")
while results.has_next():
    x = results.get_next()
    print(x)
    tables.append(x[0])
results.close()

['ConceptToConcept', 'REL', '']
['ChunkToArticle', 'REL', '']
['Chunk', 'NODE', '']
['Article', 'NODE', '']
['Concept', 'NODE', '']
['ConceptToChunk', 'REL', '']


In [120]:
for table in tables:
    print(table)
    results = conn.execute(f"CALL TABLE_INFO('{table}') RETURN *")
    while results.has_next():
        print(results.get_next())
    results.close()

ConceptToConcept
[1, 'relation', 'STRING']
[2, 'relation_type', 'STRING']
[3, 'weight', 'INT32']
[4, 'chunk_ids', 'STRING']
ChunkToArticle
[1, 'relation_type', 'STRING']
Chunk
[0, 'uuid', 'STRING', True]
[1, 'content', 'STRING', False]
Article
[0, 'page', 'STRING', True]
[1, 'language', 'STRING', False]
Concept
[0, 'name', 'STRING', True]
[1, 'importance', 'INT32', False]
[2, 'category', 'STRING', False]
ConceptToChunk
[1, 'relation_type', 'STRING']


In [121]:
######## insert article into data base #########
results = conn.execute("CREATE (u:Article {page: 'Albert Einstein', language: 'en'})")
results.close()

In [122]:
####### insert chunks into data base and link them to article "Albert Einstein" 
import re

with open("./output/db/chunks.json", "r") as f:
    chunks = json.loads(f.read())
print(len(chunks))
      
for chunk in chunks:
    uuid = chunk['kwargs']['metadata']['uuid']
    content = chunk['kwargs']['page_content']
    content = re.sub('[^A-Z.a-z]+', ' ', content)
    statement = f"CREATE (u:Chunk {{uuid: '{uuid}', content: '{content}'}})"
    results = conn.execute(statement)
    results.close()
    statement = f"MATCH (u1:Chunk), (u2:Article) WHERE u1.uuid = '{uuid}' AND u2.page = 'Albert Einstein' CREATE (u1)-[:ChunkToArticle {{relation_type: 'belongsTo'}}]->(u2)"
    results = conn.execute(statement)
    results.close()    

97


In [123]:
results = conn.execute('MATCH (a:Chunk)-[f:ChunkToArticle]->(b:Article) RETURN a.uuid as uuid, f.relation_type AS relation, b.page as page;').get_as_df()
results.head()

Unnamed: 0,uuid,relation,page
0,0af897c6-3391-496e-a93c-a5a18b350f50,belongsTo,Albert Einstein
1,4a87bc98-7290-4788-a482-0af3c1767dbf,belongsTo,Albert Einstein
2,a92380ed-183b-412b-953e-4e7039f9b30d,belongsTo,Albert Einstein
3,7f9dbafe-f39b-4ae5-96d1-f58e72415df2,belongsTo,Albert Einstein
4,57187cee-a334-4c5f-bfab-4551cf726c95,belongsTo,Albert Einstein


In [124]:
concept_ids = {}
with open("./output/db/concepts.json", "r") as f:
    concepts = json.loads(f.read())
print(len(concepts))
for concept in concepts:
    name = concept['entity']
    name = re.sub('[^A-Z.a-z]+', '_', name)
    category = concept['category']
    importance = concept['importance']
    uuid = concept['uuid']
    if name not in concept_ids:
        concept_ids[name] = concept
        statement = f"CREATE (u:Concept {{name: '{name}', category: '{category}'}})"        
        results = conn.execute(statement)
        results.close()
    statement = f"MATCH (u1:Concept), (u2:Chunk) WHERE u1.name = '{name}' AND u2.uuid = '{uuid}' CREATE (u1)-[:ConceptToChunk {{relation_type: 'occures'}}]->(u2)"
    results = conn.execute(statement)
    results.close()

680


In [125]:
results = conn.execute('MATCH (a:Concept)-[f:ConceptToChunk]->(b:Chunk) RETURN a.name as name, f.relation_type AS relation, b.uuid as uuid;').get_as_df()
results.head()

Unnamed: 0,name,relation,uuid
0,Albert_Einstein,occures,0af897c6-3391-496e-a93c-a5a18b350f50
1,Albert_Einstein,occures,10326035-baa9-45c3-ad5f-58de2098b98a
2,Albert_Einstein,occures,54dc8c74-0933-4c47-9228-9b40070a6adc
3,Albert_Einstein,occures,fe7f8933-eaf1-4080-97bf-ab18cadff6de
4,Albert_Einstein,occures,3a198290-8dcd-4840-bb95-003e1be1b1c5


In [126]:
df = pd.read_csv("./output/db/einstein_graph_data_cleaned.csv", usecols=["source", "target", "relation", "weight", "uuids"])
df.head()

Unnamed: 0,source,target,relation,weight,uuids
0,Albert Einstein,theory of relativity,developed the theory of relativity,1,['0af897c6-3391-496e-a93c-a5a18b350f50']
1,Albert Einstein,quantum mechanics,made important contributions to quantum mechanics,1,['0af897c6-3391-496e-a93c-a5a18b350f50']
2,Albert Einstein,mass–energy equivalence formula E=mc2,developed the mass–energy equivalence formula ...,1,['0af897c6-3391-496e-a93c-a5a18b350f50']
3,Albert Einstein,photoelectric effect,discovered the law of the photoelectric effect,1,['0af897c6-3391-496e-a93c-a5a18b350f50']
4,Albert Einstein,quantum theory,took a pivotal step in the development of quan...,1,['0af897c6-3391-496e-a93c-a5a18b350f50']


In [145]:
for index, row in df.iterrows():
    source = row['source']
    target = row['target']
    relation = str(row['relation'])
    weight = row['weight']
    uuids = row['uuids']
    uuids = uuids.replace("[", "")
    uuids = uuids.replace("]", "")
    uuids = uuids.split(", ")
    uuids = [uuid[1:-1] for uuid in uuids]
    
    source = re.sub('[^A-Z.a-z]+', '_', source)
    target = re.sub('[^A-Z.a-z]+', '_', target)
    relation = re.sub('[^A-Z.a-z]+', '_', relation)
    if source in concept_ids and target in concept_ids:
        statement = f"MATCH (u1:Concept), (u2:Concept) WHERE u1.name = '{source}' AND u2.name = '{target}' CREATE (u1)-[:ConceptToConcept {{relation_type: 'relatesTo', relation: '{relation}', weight: {weight}}}]->(u2)"
        results = conn.execute(statement)
        results.close()    
        

In [146]:
results = conn.execute('MATCH (a:Concept)-[f:ConceptToConcept]->(b:Concept) RETURN a.name as source, f.relation AS relation, b.name as target;').get_as_df()
results.head()

Unnamed: 0,source,relation,target
0,Albert_Einstein,developed the theory of relativity,theory_of_relativity
1,Albert_Einstein,made important contributions to quantum mechanics,quantum_mechanics
2,Albert_Einstein,made important contributions to quantum mechanics,quantum_mechanics
3,Albert_Einstein,discovered the law of the photoelectric effect,photoelectric_effect
4,Albert_Einstein,took a pivotal step in the development of quan...,quantum_theory


In [None]:
#####################  Backup #################

### Text2GraphQuery/ Text2Cypher

#### What is Cypher?

```cypher
// Pattern of vertex/node
(foo:tagA)

// Pattern of edge/relationship
()-[e:type_x]->()

// Pattern Matching
MATCH p=(foo:tagA{name: "Jerry"})-[:type_x]->()
RETURN p LIMIT 5

// Pattern Matching with WHERE clause, NebulaGraph Cypher
MATCH (e1:entity)-[r:relationship]->(e2:entity)
WHERE id(e1) == 'James Gunn'
RETURN e2.entity.name

```