In [1]:
import sys
sys.path.append("../../")

import json
import pandas as pd
from ingestion.generate_ingest import IngestionGenerator
from pyingest.pyingest import PyIngestForStreamlit

In [2]:
username = "neo4j"
password = "password"
database = "neo4j"
uri = "bolt://localhost:7687"

In [3]:
data_model = """
{"nodes": [{"label": "Battle", "properties": ["name", "year", "battle_number", "attacker_king", "defender_king", "attacker_1", "attacker_2", "attacker_3", "attacker_4", "defender_1", "defender_2", "defender_3", "defender_4", "attacker_outcome", "battle_type", "major_death", "major_capture", "attacker_size", "defender_size", "attacker_commander", "defender_commander", "summer", "location", "region", "note"], "unique_constraints": ["name"]}, {"label": "King", "properties": ["name"], "unique_constraints": ["name"]}, {"label": "Commander", "properties": ["name"], "unique_constraints": ["name"]}, {"label": "Location", "properties": ["name"], "unique_constraints": ["name"]}, {"label": "Region", "properties": ["name"], "unique_constraints": ["name"]}, {"label": "BattleOutcome", "properties": [], "unique_constraints": ["name"]}, {"label": "BattleType", "properties": [], "unique_constraints": ["name"]}, {"label": "MajorEvent", "properties": [], "unique_constraints": ["name"]}], "relationships": [{"type": "ATTACKED", "properties": [], "unique_constraints": [], "source": "Battle", "target": "King"}, {"type": "DEFENDED", "properties": [], "unique_constraints": [], "source": "Battle", "target": "King"}, {"type": "ATTACKED_WITH", "properties": [], "unique_constraints": [], "source": "Battle", "target": "Commander"}, {"type": "DEFENDED_WITH", "properties": [], "unique_constraints": [], "source": "Battle", "target": "Commander"}, {"type": "TOOK_PLACE_IN", "properties": [], "unique_constraints": [], "source": "Battle", "target": "Location"}, {"type": "LOCATED_IN", "properties": [], "unique_constraints": [], "source": "Location", "target": "Region"}, {"type": "HAD_OUTCOME", "properties": [], "unique_constraints": [], "source": "Battle", "target": "BattleOutcome"}, {"type": "OF_TYPE", "properties": [], "unique_constraints": [], "source": "Battle", "target": "BattleType"}, {"type": "EXPERIENCED_EVENT", "properties": [], "unique_constraints": [], "source": "Battle", "target": "MajorEvent"}]}
"""

In [4]:
csv_name = "got-battles.csv"
csv_dir = "../../data/csv/"

In [5]:
input_dataframe = pd.read_csv(csv_dir+csv_name)

In [6]:
# if a model other than most recent is desired, then uncomment these lines and select the version number appropriately
# model_version_to_use = []
# model_to_use = summarizer.model_history[model_version_to_use-1].dict

gen = IngestionGenerator(data_model=json.loads(data_model),
                         username=username,
                         password=password,
                         database=database,
                         uri=uri,
                         csv_name=csv_name,
                         csv_dir=csv_dir)

In [7]:
gen.config_files_list

[{'url': '$BASE/../../data/csv/got-battles.csv',
  'cql': 'WITH $dict.rows AS rows\nUNWIND rows AS row\nMERGE (n:Battle {name: row.name})\nSET n.attacker_commander = row.attacker_commander, n.major_death = row.major_death, n.attacker_size = row.attacker_size, n.battle_type = row.battle_type, n.location = row.location, n.region = row.region, n.defender_size = row.defender_size, n.attacker_2 = row.attacker_2, n.battle_number = row.battle_number, n.defender_2 = row.defender_2, n.attacker_4 = row.attacker_4, n.note = row.note, n.attacker_1 = row.attacker_1, n.defender_king = row.defender_king, n.attacker_outcome = row.attacker_outcome, n.major_capture = row.major_capture, n.attacker_3 = row.attacker_3, n.defender_4 = row.defender_4, n.attacker_king = row.attacker_king, n.defender_3 = row.defender_3, n.year = row.year, n.summer = row.summer, n.defender_1 = row.defender_1, n.defender_commander = row.defender_commander',
  'chunk_size': 100},
 {'url': '$BASE/../../data/csv/got-battles.csv',
 

In [8]:
yaml_string = gen.generate_pyingest_yaml_string()

In [9]:
print(yaml_string)

server_uri: bolt://localhost:7687
admin_user: neo4j
admin_pass: password
database: neo4j
basepath: file:./

pre_ingest:
  - CREATE CONSTRAINT battle_name IF NOT EXISTS FOR (n:Battle) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT king_name IF NOT EXISTS FOR (n:King) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT commander_name IF NOT EXISTS FOR (n:Commander) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT location_name IF NOT EXISTS FOR (n:Location) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT region_name IF NOT EXISTS FOR (n:Region) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT battleoutcome_name IF NOT EXISTS FOR (n:BattleOutcome) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT battletype_name IF NOT EXISTS FOR (n:BattleType) REQUIRE n.name IS UNIQUE;
  - CREATE CONSTRAINT majorevent_name IF NOT EXISTS FOR (n:MajorEvent) REQUIRE n.name IS UNIQUE;
files:
- chunk_size: 100
  cql: |-
    WITH $dict.rows AS rows
    UNWIND rows AS row
    MERGE (n:Battle {name: row.name})
    SET 

In [10]:
outputs = []
for prog in PyIngestForStreamlit(yaml_string=yaml_string):
    outputs.append(prog)

{} : Reading file 2024-03-02 15:36:46.503699
File {} file:.//../../data/csv/got-battles.csv
file:.//../../data/csv/got-battles.csv 0 2024-03-02 15:36:46.505308


{} : Completed file 2024-03-02 15:36:46.594753
{} : Reading file 2024-03-02 15:36:46.595354
File {} file:.//../../data/csv/got-battles.csv
file:.//../../data/csv/got-battles.csv 0 2024-03-02 15:36:46.596843
{} : Completed file 2024-03-02 15:36:46.602816
{} : Reading file 2024-03-02 15:36:46.603117
File {} file:.//../../data/csv/got-battles.csv
file:.//../../data/csv/got-battles.csv 0 2024-03-02 15:36:46.604184
{} : Completed file 2024-03-02 15:36:46.609417
{} : Reading file 2024-03-02 15:36:46.609671
File {} file:.//../../data/csv/got-battles.csv
file:.//../../data/csv/got-battles.csv 0 2024-03-02 15:36:46.610770
{} : Completed file 2024-03-02 15:36:46.616242
{} : Reading file 2024-03-02 15:36:46.616495
File {} file:.//../../data/csv/got-battles.csv
file:.//../../data/csv/got-battles.csv 0 2024-03-02 15:36:46.617556
{} : Completed file 2024-03-02 15:36:46.623007
{} : Reading file 2024-03-02 15:36:46.623266
File {} file:.//../../data/csv/got-battles.csv
file:.//../../data/csv/got-battle

In [11]:
outputs

[0.06666666666666667,
 0.13333333333333333,
 0.2,
 0.26666666666666666,
 0.3333333333333333,
 0.4,
 0.4666666666666667,
 0.5333333333333333,
 0.6,
 0.6666666666666666,
 0.7333333333333333,
 0.8,
 0.8666666666666667,
 0.9333333333333333,
 1.0]