# Graphs analysis using `memgraph`

ref: [nvidia-blog](https://developer.nvidia.com/blog/running-large-scale-graph-analytics-with-memgraph-and-nvidia-cugraph-algorithms/)

please note: in order to read the csv files in memgraph docker container, I have to change the permission of the .csv files to 101:101. It is not recommended but that is the only way that I've found so far.

In [None]:
from gqlalchemy import Memgraph

memgraph = Memgraph("127.0.0.1", 7687)
# memgraph.drop_database()

In [None]:
# memory storage mode, just ignore it for now
# memgraph.execute("STORAGE MODE ON_DISK_TRANSACTIONAL;")
# memgraph.execute("STORAGE MODE IN_MEMORY_TRANSACTIONAL;")

In [None]:
restuls = memgraph.execute_and_fetch("SHOW STORAGE INFO;")
list(restuls)

## Add original data into the `memgraph`

As Memgraph imports queries faster when data has indices, create them for all the nodes

In [None]:
memgraph.execute("CREATE INDEX ON :Project(id);")
memgraph.execute("CREATE INDEX ON :Lender(id);")
memgraph.execute("CREATE INDEX ON :Tag(name);")

In [None]:
memgraph.execute(
    """
LOAD CSV FROM "/csv_data/tags.csv" WITH HEADER AS row
CREATE (:Tag {name: row['name:ID']});
"""
)

In [None]:
memgraph.execute(
    """
LOAD CSV FROM "/csv_data/lenders.csv" WITH HEADER AS row
CREATE (:Lender {id: toInteger(row['lender_id']), name: row['lender_name'], publicId: row['lender_publicId']});
"""
)

In [None]:
memgraph.execute(
    """
LOAD CSV FROM "/csv_data/projects.csv" WITH HEADER AS ROW
CREATE (:Project {id: toInteger(ROW['project_id']), name: ROW['project_name'], sector_id: toInteger(ROW['sector_id']), activity_id: toInteger(ROW['activity_id'])})
"""
)

In [None]:
memgraph.execute(
    """
LOAD CSV FROM "/csv_data/lender_project.csv" WITH HEADER AS ROW
MATCH (project:Project {id: toInteger(ROW[':END_ID(Project-ID)'])}), (lender:Lender {id: toInteger(ROW[':START_ID(Lender-ID)'])})
CREATE (lender)-[:LEND {amount: toFloat(ROW['loan_shareAmount']), date: ROW['loan_date']}]->(project)
"""
)

In [None]:
memgraph.execute(
    """
LOAD CSV FROM "/csv_data/project_tags.csv" WITH HEADER AS ROW
MATCH (project:Project {id: toInteger(ROW['project_id'])}), (tag:Tag {name: ROW[':END_ID']})
CREATE (project)-[:TAGGED_WITH]->(tag)
"""
)

## Create new relationships between nodes

### Create `COMMON_LOAN` relationships between `Tag` node

The following query help creating projection of the `project-tag` into the `tag`

In [None]:
results = memgraph.execute_and_fetch(
    """
MATCH (t1:Tag)<-[:TAGGED_WITH]-(p:Project)-[:TAGGED_WITH]->(t2:Tag)
WHERE ID(t1) > ID(t2)
WITH t1, t2, COUNT(DISTINCT p) as intersection_count
CALL {WITH t1 MATCH (p1:Project)-[:TAGGED_WITH]->(t1) RETURN COUNT(DISTINCT p1) as t1pro}
CALL {WITH t2 MATCH (p2:Project)-[:TAGGED_WITH]->(t2) RETURN COUNT(DISTINCT p2) as t2pro}
WITH t1, t2, t1pro, t2pro, intersection_count, t1pro + t2pro - intersection_count as union_count
RETURN t1.name, t2.name, t1pro, t2pro, intersection_count, union_count;
"""
)
list(results)

create relationshiop, with weights is defined as intersaction over union

In [None]:
memgraph.execute(
    """
MATCH (t1:Tag)<-[:TAGGED_WITH]-(p:Project)-[:TAGGED_WITH]->(t2:Tag)
WHERE ID(t1) > ID(t2)
WITH t1, t2, COUNT(DISTINCT p) as intersection_count
CALL {WITH t1 MATCH (p1:Project)-[:TAGGED_WITH]->(t1) RETURN COUNT(DISTINCT p1) as t1pro}
CALL {WITH t2 MATCH (p2:Project)-[:TAGGED_WITH]->(t2) RETURN COUNT(DISTINCT p2) as t2pro}
WITH t1, t2, intersection_count, t1pro + t2pro - intersection_count as union_count, 
      intersection_count * 1.0 / (t1pro + t2pro - intersection_count) as theweight
CREATE (t1)-[i:COMMON_LOAN {weight: theweight}]->(t2)
RETURN COUNT(i);
"""
)

In [None]:
%%script false --no-raise-error
# this simple projection is just the count. Ignore for now 

memgraph.execute(
    """
MATCH (t1:Tag)<-[:TAGGED_WITH]-(loan:Project)-[:TAGGED_WITH]->(t2:Tag)
WHERE ID(t1) > ID(t2)
WITH collect(DISTINCT loan) as loans, t1, t2
MERGE (t1)-[:COMMON_LOANS {weight: size(loans)}]->(t2)
"""
)

### Create `INTEREST` relationship between `Lender` and `Tag`

In [None]:
# INTEREST relationship
memgraph.execute(
    """
MATCH (lender:Lender)-[lend:LEND]->(loan:Project)-[t:TAGGED_WITH]->(tag:Tag)
MERGE (lender)-[i:INTEREST]->(tag)
ON CREATE SET i.weight = 1
ON MATCH SET i.weight = i.weight + 1
"""
)