In [13]:
from neo4j import GraphDatabase

# URI examples: "neo4j://localhost", "neo4j+s://xxx.databases.neo4j.io"


In [14]:
URI = "bolt://localhost:7687"
AUTH = ("neo4j", "azerty92")

with GraphDatabase.driver(URI, auth=AUTH) as driver: 
    driver.verify_connectivity() 

In [33]:
def count_row(tx, fn):  
    result = tx.run(
        f"""
        LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
        RETURN COUNT(row)
        """ 
    )
    count = result.fetch(1)
    summary = result.consume()
    return count, summary

def exec_write(query, csv_filename, db):
    # Execute a write query in the provided DB from a csv
    with driver.session(database=db) as session:
        summary = session.run(query.format(fn=csv_filename)).consume()

        print("Created {nodes_created} nodes and {relationship_created} relationships and set {properties_set} properties in {time} ms.".format(
            nodes_created=summary.counters.nodes_created,
            properties_set=summary.counters.properties_set,
            relationship_created=summary.counters.relationships_created,
            time=summary.result_available_after
        ))

In [43]:
create_country = """
LOAD CSV WITH HEADERS FROM 'file:///{fn}' AS row 
WITH row WHERE row.FIPS IS NOT NULL 
MERGE (c:Country {{FIPS: row.FIPS, name: row.Name}});  
"""
exec_write(create_country, "fips.csv", "gdeltMono")

Created 279 nodes and 0 relationships and set 558 properties in 217 ms.


# Create Events and actors

In [17]:
# Count lines in csv
with driver.session(database="gdeltMono") as session:
    count, summary = session.execute_write(count_row, fn='20230101000000.translation.export.csv')  

    print("Created {nodes_created} nodes in {time} ms.".format(
        nodes_created=summary.counters.nodes_created,
        time=summary.result_available_after
    ))
    print(count)

Created 0 nodes in 1059 ms.
[<Record COUNT(row)=332>]


In [18]:
# Create event nodes
create_event = """
LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
CALL {{
    WITH row
    MERGE (event:Event {{GlobalEventID:row[0]}})
    ON CREATE
    SET
        event.Date = Datetime(row[1]),
        event.Type = row[26]
}} IN TRANSACTIONS OF 1000 ROWS;
"""

exec_write(create_event, "20230101000000.translation.export.csv", "gdeltMono")

Created 332 nodes and 0 relationships in 1600 ms.


In [46]:
# Create relationship between event and country

create_event_country = """
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[53] IS NOT NULL
            MATCH (event:Event {{GlobalEventID : row[0]}})
            MATCH (country:Country {{FIPS : row[53]}})
            CALL {{
                WITH row, event, country
                MERGE (event)-[:TAKES_PLACE {{Lat: coalesce(toFloat(row[56]), null), Lon: coalesce(toFloat(row[57]), null)}}]->(country)
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_event_country, "20230101000000.translation.export.csv", "gdeltMono")

Created 0 nodes and 311 relationships and set 622 properties in 482 ms.


In [19]:
# Create actor1 node

# actor1Name idex: 6
# actor2Name idex: 16
create_actor1 = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[6] IS NOT NULL
            CALL {{
                WITH row
                MERGE (:Actor {{Name:row[6]}})
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_actor1, "20230101000000.translation.export.csv", "gdeltMono")

Created 113 nodes and 0 relationships in 290 ms.


In [20]:
# Create actor2 node

# actor1Name idex: 6
# actor2Name idex: 16
create_actor2 = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[16] IS NOT NULL
            CALL {{
                WITH row
                MERGE (:Actor {{Name:row[16]}})
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_actor2, "20230101000000.translation.export.csv", "gdeltMono")

Created 27 nodes and 0 relationships in 304 ms.


In [21]:
# Create relationship between actor1 and event

# actor1Name idex: 6
# actor2Name idex: 16
create_actor1_event = """
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[6] IS NOT NULL
            MATCH (event:Event {{GlobalEventID : row[0]}})
            MATCH (actor:Actor {{Name : row[6]}})
            CALL {{
                WITH event, actor
                MERGE (actor)-[:GENERATES]->(event)
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_actor1_event, "20230101000000.translation.export.csv", "gdeltMono")

Created 0 nodes and 280 relationships in 1075 ms.


In [22]:
# Create relationship between event and actor2

# actor1Name idex: 6
# actor2Name idex: 16
create_event_actor2 = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[16] IS NOT NULL
            MATCH (event:Event {{GlobalEventID : row[0]}})
            MATCH (actor:Actor {{Name : row[16]}})
            CALL {{
                WITH event, actor
                MERGE (event)-[:IMPACTS]->(actor)
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_event_actor2, "20230101000000.translation.export.csv", "gdeltMono")

Created 0 nodes and 198 relationships in 507 ms.


# Create Mentions and Resources

In [30]:
create_resource = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[5] IS NOT NULL
            CALL {{
                WITH row
                MERGE (:Resource {{OriginalID:row[5]}})
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_resource, "20230101000000.translation.mentions.csv", "gdeltMono")

Created 127 nodes and 0 relationships in 121 ms.


In [31]:
create_resource_event = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[5] IS NOT NULL
            CALL {{
                WITH row
                MATCH (event:Event {{GlobalEventID : row[0]}})
                MATCH (resource:Resource {{OriginalID : row[5]}})
                MERGE (resource)-[:MENTIONS {{Confidence:coalesce(toInteger(row[11]), 0)}}]->(event)
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_resource_event, "20230101000000.translation.mentions.csv", "gdeltMono")

Created 0 nodes and 357 relationships in 451 ms.


In [14]:
create_resource_h = """ 
            LOAD CSV WITH HEADERS FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row.MentionIdentifier IS NOT NULL
            CALL {{
                WITH row
                MERGE (:Resource {{OriginalID:row.MentionIdentifier}})
            }} IN TRANSACTIONS OF 1000 ROWS;
        """

In [17]:
exec_write(create_resource_h, "batch_2021-01-01_000000_2021-03-01_010000_mentions.csv", "gdelt")

Created 0 nodes and 0 relationships in 7453 ms.


# GKG

In [25]:
create_themes = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[7] IS NOT NULL
            CALL {{
                WITH row
                UNWIND split(row[7], ';') as theme
                MERGE (:Theme {{Name:theme}})
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_themes, "20230101000000.translation.gkg.csv", "gdeltMono")

Created 1657 nodes and 0 relationships in 5867 ms.


In [27]:
create_sources = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[3] IS NOT NULL
            CALL {{
                WITH row
                MERGE (:Source {{name:row[2], type:row[3]}})
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_sources, "20230101000000.translation.gkg.csv", "gdeltMono")

Created 152 nodes and 0 relationships in 335 ms.


In [41]:
update_resource = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[4] IS NOT NULL
            CALL {{
                WITH row
                MATCH (r:Resource {{OriginalID:row[4]}})
                SET r += {{Date: Datetime(substring(row[1], 0, 8)), OriginalLanguage:coalesce(row[-2], "ENG"), Tone:coalesce(toFloat(split(row[15], ',')[0]), 0.0)}}
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(update_resource, "20230101000000.translation.gkg.csv", "gdeltMono")

Created 0 nodes and 0 relationships and set 378 properties in 403 ms.


In [40]:
create_resource_themes = """ 
            LOAD CSV FROM 'file:///{fn}' AS row FIELDTERMINATOR '\t'
            WITH row WHERE row[7] IS NOT NULL AND row[4] IS NOT NULL
            CALL {{
                WITH row
                UNWIND split(row[7], ';') as theme
                MATCH (t:Theme {{Name:theme}})
                MATCH (resource:Resource {{OriginalID : row[4]}})
                MERGE (resource)-[:HAS]->(t)
            }} IN TRANSACTIONS OF 1000 ROWS;
        """
exec_write(create_resource_themes, "20230101000000.translation.gkg.csv", "gdeltMono")

Created 0 nodes and 4260 relationships and set 0 properties in 16799 ms.
