## Preparing the input files

In [1]:
abstracts = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_unfiltered/unsegmented_unfiltered.csv" #Abstract
sentences = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_unfiltered/segmented_unfiltered.csv" # Sentences
relations = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_processed_data/filtered.csv" # Relations
entities = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_processed_data/entities.csv" # Entities
# den = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_filtered/d3n.json"
output = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/d3n_KG_ready/"

In [None]:
from typing import List, Dict, Optional, Union, Any
from dataclasses import dataclass
import pandas as pd
from tqdm import tqdm
import hashlib
from datetime import datetime
import re

# Database Imports
from neo4j import GraphDatabase
from typing import Dict, Any, Optional
from datetime import datetime
import logging
import os
from logging.handlers import RotatingFileHandler

In [3]:
def generate_id(*args: Optional[Union[str,int]]) -> str:
    id_string = "-".join(str(arg) for arg in args if arg is not None)
    
    # Generate SHA-256 hash of the concatenated string
    return hashlib.sha256(id_string.encode()).hexdigest()

def clean_mesh(mesh: str) -> List[str]:
    # Remove square brackets and extra quotes
    mesh_cleaned = mesh.strip("[]").replace("'", "")
    # Split by commas and strip whitespace around each term
    mesh_terms = [term.strip() for term in mesh_cleaned.split(',')]
    
    return mesh_terms

def clean_entity_type(type:str) -> str:
    if type.startswith('@'):
        return type[1:]
    else:
        return type

def parse_date(date_str):
    try:
        # Try full format with day (e.g., "2020 Jan 15")
        return datetime.strptime(date_str, "%Y %b %d")
    except ValueError:
        try:
            # Try year and month only (e.g., "2020 Jan")
            return datetime.strptime(date_str, "%Y %b")
        except ValueError:
            try:
                # Try year only (e.g., "2020")
                return datetime.strptime(date_str, "%Y")
            except ValueError:
                # If format is still invalid, use regex to extract the year only
                match = re.match(r"(\d{4})", date_str)
                if match:
                    return datetime.strptime(match.group(1), "%Y")
                else:
                    raise ValueError(f"Invalid date format for date_of_publication: {date_str}")

### Reinterpreting Relation Data

In [6]:
relations_Df = pd.read_csv(relations)
relations_df = relations_Df[:1000]
relations_df.to_csv("D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Data/relation_split.csv", index=True)

# Pushing Data to KG

In [14]:
class LoggerSetup:
    """
    Configures logging with both console and file outputs.
    Supports log rotation and custom formatting.
    """
    
    @staticmethod
    def setup_logger(
        logger_name: str,
        log_dir: str,
        max_file_size_mb: int = 10,
        backup_count: int = 5,
        log_level: int = logging.INFO,
        console_output: bool = True
    ) -> logging.Logger:
        """
        Sets up a logger with both file and optional console output.
        
        Args:
            logger_name: Name of the logger
            log_dir: Directory where log files will be stored
            max_file_size_mb: Maximum size of each log file in MB
            backup_count: Number of backup files to keep
            log_level: Logging level (e.g., logging.INFO, logging.DEBUG)
            console_output: Whether to also output logs to console
            
        Returns:
            logging.Logger: Configured logger instance
        """
        # Create logger
        logger = logging.getLogger(logger_name)
        logger.setLevel(log_level)
        
        # Create logs directory if it doesn't exist
        os.makedirs(log_dir, exist_ok=True)
        
        # Create formatters
        file_formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        
        console_formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        
        # Create and configure file handler with rotation
        log_file_path = os.path.join(log_dir, f'{logger_name}_{datetime.now().strftime("%Y%m%d")}.log')
        file_handler = RotatingFileHandler(
            filename=log_file_path,
            maxBytes=max_file_size_mb * 1024 * 1024,  # Convert MB to bytes
            backupCount=backup_count,
            encoding='utf-8'
        )
        file_handler.setFormatter(file_formatter)
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)
        
        # Add console handler if requested
        if console_output:
            console_handler = logging.StreamHandler()
            console_handler.setFormatter(console_formatter)
            console_handler.setLevel(log_level)
            logger.addHandler(console_handler)
            
        return logger

In [15]:
def setup_logger(logger_name: str, log_dir: str, log_level: int = logging.WARNING, console_output: bool = True) -> logging.Logger:
    """Set up a logger with both file and optional console output."""
    logger = logging.getLogger(logger_name)
    logger.setLevel(log_level)

    # Ensure log directory exists
    os.makedirs(log_dir, exist_ok=True)
    
    # Define log format
    file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
    console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

    # Set up file handler with rotation
    log_file_path = os.path.join(log_dir, f'{logger_name}_{datetime.now().strftime("%Y%m%d")}.log')
    file_handler = RotatingFileHandler(log_file_path, maxBytes=10 * 1024 * 1024, backupCount=5)
    file_handler.setFormatter(file_formatter)
    logger.addHandler(file_handler)

    # Optionally add console output
    if console_output:
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(console_formatter)
        logger.addHandler(console_handler)

    return logger

In [16]:
def setup_database(uri: str, username: str, password: str, database_name: str, 
                  log_dir: str, initial_data: Optional[Dict[str, Any]] = None, 
                  delete_if_exists: bool = False) -> None:
    logger = setup_logger(log_dir, 'GraphDB_Setup')
    logger.info(f"Starting database setup for {database_name}")
    
    try:
        driver = GraphDatabase.driver(uri, auth=(username, password))
        
        with driver.session() as session:
            session.run("RETURN 1")
            logger.info("Database connection verified")
            
        with driver.session(database="system") as session:
            result = session.run(
                "SHOW DATABASES WHERE name = $name",
                name=database_name
            )
            db_exists = bool(result.single())
            
            if db_exists and delete_if_exists:
                logger.warning(f"Dropping existing database: {database_name}")
                session.run("DROP DATABASE $name IF EXISTS", name=database_name)
                
            if not db_exists or delete_if_exists:
                logger.info(f"Creating new database: {database_name}")
                session.run("CREATE DATABASE $name IF NOT EXISTS", name=database_name)
                
                if initial_data:
                    logger.info("Initializing database with provided data")
                    with driver.session(database=database_name) as db_session:
                        if 'constraints' in initial_data:
                            for constraint in initial_data['constraints']:
                                db_session.run(constraint)
                                logger.debug(f"Created constraint: {constraint}")
                        
                        if 'nodes' in initial_data:
                            for node_query in initial_data['nodes']:
                                db_session.run(node_query)
                                logger.debug("Created node batch")
                        
                        if 'relationships' in initial_data:
                            for rel_query in initial_data['relationships']:
                                db_session.run(rel_query)
                                logger.debug("Created relationship batch")
    
    except Exception as e:
        logger.error(f"Database setup failed: {str(e)}")
        raise
    finally:
        if 'driver' in locals():
            driver.close()
            logger.info("Database connection closed")

### Entity Knowledge Graph

In [10]:
class EntityKnowledgeGraph:
    def __init__(self, uri: str, username: str, password: str, database_name: str, log_dir: str):
        self.logger = setup_logger(log_dir, 'EntityKnowledgeGraph')
        self.logger.info(f"Initializing EntityKnowledgeGraph with database: {database_name}")
        
        try:
            self.driver = GraphDatabase.driver(uri, auth=(username, password))
            self.uri = uri
            self.username = username
            self.database_name = database_name
            self.logger.info("Successfully established database connection")
        except Exception as e:
            self.logger.error(f"Failed to initialize database connection: {str(e)}")
            raise

    def initialize_database(self, initial_data: Optional[Dict[str, Any]] = None, force_recreate: bool = False) -> None:
        self.logger.info(f"Initializing database. Force recreate: {force_recreate}")
        try:
            setup_database(
                uri=self.uri,
                username=self.username,
                password=self.password,
                database_name=self.database_name,
                initial_data=initial_data,
                delete_if_exists=force_recreate
            )
            self.logger.info("Database initialization completed successfully")
        except Exception as e:
            self.logger.error(f"Database initialization failed: {str(e)}")
            raise

    def close(self):
        self.logger.info("Closing database connection")
        try:
            self.driver.close()
            self.logger.info("Database connection closed successfully")
        except Exception as e:
            self.logger.error(f"Error closing database connection: {str(e)}")
            raise

    def create_or_update_paper(self, paper_data):
        self.logger.info(f"Creating/updating paper with PMID: {paper_data.get('pmid')}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._create_or_update_paper, paper_data)
            self.logger.info(f"Successfully created/updated paper {paper_data.get('pmid')}")
        except Exception as e:
            self.logger.error(f"Failed to create/update paper: {str(e)}")
            raise

    @staticmethod
    def _create_or_update_paper(tx, paper_data):
        pmid = paper_data.get('pmid')
        title = paper_data.get('title')
        date_of_publication = paper_data.get('date_of_publication')
        mesh_keywords = paper_data.get('mesh_keywords', [])
        
        query = """
        MERGE (p:Paper {pmid: $pmid})
        ON CREATE SET p.title = $title, p.date_of_publication = $date_of_publication, p.mesh_keywords = $mesh_keywords
        ON MATCH SET p.title = $title, p.date_of_publication = $date_of_publication, p.mesh_keywords = $mesh_keywords
        """
        tx.run(query, pmid=pmid, title=title, date_of_publication=date_of_publication, mesh_keywords=mesh_keywords)

        
    def create_or_update_sentence(self, pmid, sentence):
        self.logger.info(f"Creating/updating sentence {sentence.get('sentence_id')} for paper {pmid}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._create_or_update_sentence, pmid, sentence)
            self.logger.info(f"Successfully created/updated sentence {sentence.get('sentence_id')}")
        except Exception as e:
            self.logger.error(f"Failed to create/update sentence: {str(e)}")
            raise

    @staticmethod
    def _create_or_update_sentence(tx, pmid, sentence):
        query = """
        MATCH (p:Paper {pmid: $pmid})
        MERGE (s:Sentence {sentence_id: $sentence_id, text: $text})
        MERGE (p)-[:HAS_ENTITIES]->(s)  
        """
        tx.run(query, pmid=pmid, sentence_id=sentence['sentence_id'], text=sentence['text'])

    def create_or_update_entity(self, sentence_id, entity):
        self.logger.info(f"Creating/updating entity {entity.get('entity_id')} for sentence {sentence_id}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._create_or_update_entity, sentence_id, entity)
            self.logger.info(f"Successfully created/updated entity {entity.get('entity_id')}")
        except Exception as e:
            self.logger.error(f"Failed to create/update entity: {str(e)}")
            raise

    @staticmethod
    def _create_or_update_entity(tx, sentence_id, entity):
        clean_type = entity['type'][1:] if entity['type'].startswith('@') else entity['type']
        query = """
        MATCH (s:Sentence {sentence_id: $sentence_id})
        MERGE (e:Entity {entity_id: $entity_id, name: $name})
        MERGE (s)-[r:%s {sent_ent_edge_id: $sent_ent_edge_id}]->(e)  
        """ % clean_type
        tx.run(query, 
            sentence_id=sentence_id, 
            entity_id=entity['entity_id'], 
            name=entity['name'],
            sent_ent_edge_id=entity['sent_ent_edge_id'])

    def create_or_update_relationship(self, source_id, target_id, relation_type, relation_id):
        self.logger.info(f"Creating/updating relationship {relation_type} between entities {source_id} and {target_id} with relation_id {relation_id}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._create_or_update_relationship, source_id, target_id, relation_type, relation_id)
            self.logger.info(f"Successfully created/updated relationship {relation_type}")
        except Exception as e:
            self.logger.error(f"Failed to create/update relationship: {str(e)}")
            raise

    @staticmethod
    def _create_or_update_relationship(tx, source_id, target_id, relation_type, relation_id):
        query = """
        MATCH (e1:Entity {entity_id: $source_id})
        MATCH (e2:Entity {entity_id: $target_id})
        MERGE (e1)-[r:%s {relation_id: $relation_id}]->(e2)
        """ % relation_type
        tx.run(query, 
            source_id=source_id, 
            target_id=target_id, 
            relation_id=relation_id)

    # Delete methods with logging
    def delete_paper(self, pmid):
        self.logger.info(f"Deleting paper with PMID: {pmid}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._delete_paper, pmid)
            self.logger.info(f"Successfully deleted paper {pmid}")
        except Exception as e:
            self.logger.error(f"Failed to delete paper: {str(e)}")
            raise

    # Delete methods with logging
    def delete_paper(self, pmid):
        self.logger.info(f"Deleting paper with PMID: {pmid}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._delete_paper, pmid)
            self.logger.info(f"Successfully deleted paper {pmid}")
        except Exception as e:
            self.logger.error(f"Failed to delete paper: {str(e)}")
            raise

    @staticmethod
    def _delete_paper(tx, pmid):
        query = """
        MATCH (p:Paper {pmid: $pmid})
        DETACH DELETE p
        """
        tx.run(query, pmid=pmid)

    def delete_sentence(self, sentence_id):
        self.logger.info(f"Deleting sentence: {sentence_id}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._delete_sentence, sentence_id)
            self.logger.info(f"Successfully deleted sentence {sentence_id}")
        except Exception as e:
            self.logger.error(f"Failed to delete sentence: {str(e)}")
            raise

    @staticmethod
    def _delete_sentence(tx, sentence_id):
        query = """
        MATCH (s:Sentence {sentence_id: $sentence_id})
        DETACH DELETE s
        """
        tx.run(query, sentence_id=sentence_id)

    def delete_entity(self, entity_id):
        self.logger.info(f"Deleting entity: {entity_id}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._delete_entity, entity_id)
            self.logger.info(f"Successfully deleted entity {entity_id}")
        except Exception as e:
            self.logger.error(f"Failed to delete entity: {str(e)}")
            raise

    @staticmethod
    def _delete_entity(tx, entity_id):
        query = """
        MATCH (e:Entity {entity_id: $entity_id})
        DETACH DELETE e
        """
        tx.run(query, entity_id=entity_id)

    def delete_relationship(self, source_id, target_id, relation_type):
        self.logger.info(f"Deleting relationship {relation_type} between entities {source_id} and {target_id}")
        try:
            with self.driver.session() as session:
                session.execute_write(self._delete_relationship, source_id, target_id, relation_type)
            self.logger.info(f"Successfully deleted relationship {relation_type}")
        except Exception as e:
            self.logger.error(f"Failed to delete relationship: {str(e)}")
            raise

    @staticmethod
    def _delete_relationship(tx, source_id, target_id, relation_type):
        query = """
        MATCH (e1:Entity {entity_id: $source_id})-[r:ENTITY_RELATION {type: $relation_type}]->(e2:Entity {entity_id: $target_id})
        DELETE r
        """
        tx.run(query, 
            source_id=source_id, 
            target_id=target_id, 
            relation_type=relation_type)

In [None]:
def convert_to_samples(csv_path1, csv_path2, csv_path3, csv_path4):
    # Step 1: Read the CSV files into dataframes
    df_abstracts = pd.read_csv(csv_path1)  # Contains abstract level information
    df_sentences = pd.read_csv(csv_path2)  # Contains sentence level information
    df_entities = pd.read_csv(csv_path3)   # Contains entity and relation information
    df_relations = pd.read_csv(csv_path4)
    
    # Step 2: Initialize the samples list
    samples = []
    
    # Step 3: Iterate over abstracts to create abstract dicts
    for _, abstract_row in tqdm(df_abstracts.iterrows(), total=len(df_abstracts), desc="Processing Abstracts"):
        dop_str = parse_date(date_str=abstract_row['Date of Publication'])
        pmid_int = int(abstract_row['PMID'])
        abstract = {
            "pmid": pmid_int,  # Example column in csv1
            "title": abstract_row['Title'],  # Example column in csv1
            "date_of_publication": dop_str,  # Example column in csv1
            "mesh_keywords": clean_mesh(str(abstract_row['MeSH'])),  # Example column in csv1
            "sentences": []  # List to store sentence dicts
        }
        # abstracts.append(abstracts)
        abstract_sentences = df_sentences[df_sentences['pmid'] == abstract_row['PMID']]
        
        for _, sentence_row in abstract_sentences.iterrows():
            sent_id = generate_id(pmid_int,int(sentence_row['sent_no']))
            sentence = {
                "sentence_id": sent_id,  # Example column in csv2
                "text": sentence_row['sentence'],  # Example column in csv2
                "entities": [],  # List to store entity dicts
                "relationships": []  # List to store relation dicts
            }
            
            # Step 5: Get entities and relationships for this sentence
            sentence_entities = df_entities[(df_entities['pmid'] == abstract_row['PMID']) & 
                                            (df_entities['sentence_no'] == sentence_row['sent_no'])]

            for _, entity_row in sentence_entities.iterrows():
                entity_type = clean_entity_type(entity_row['entity_type'])
                sent_ent_id = generate_id(sent_id,entity_row['entity_name'],entity_type)
                entity = {
                    "entity_id": generate_id(entity_row['entity_name'],entity_type),  # Example column in csv3
                    "name": entity_row['entity_name'],  # Example column in csv3
                    "type": entity_type,  # Example column in csv3
                    "sent_ent_edge_id":sent_ent_id
                }
                sentence['entities'].append(entity)
                
            # Step 6: Get relations based on entities
            sentence_relations = df_relations[(df_relations['pmid'] == abstract_row['PMID']) & 
                                              (df_relations['sent_no'] == sentence_row['sent_no'])]

            if not sentence_relations.empty:
                for _, relation_row in sentence_relations.iterrows():
                    entity_1_type = clean_entity_type(relation_row['entity_1_type'])
                    entity_2_type = clean_entity_type(relation_row['entity_2_type'])
                    sei = generate_id(relation_row['entity_1'],entity_1_type)
                    tei = generate_id(relation_row['entity_2'],entity_2_type)
                    relation = {
                        "source_entity_id": sei,
                        "target_entity_id": tei,
                        "relation_id": generate_id(sei,tei, relation_row['relation_type']),
                        "source_entity_name": relation_row['entity_1'],
                        "target_entity_name": relation_row['entity_2'],
                        "relation_type": relation_row['relation_type']
                    }
                    sentence['relationships'].append(relation)
            else:
                # Append an empty dictionary with attributes but no values if no relations are found
                relation = {
                    "source_entity_id": None,
                    "target_entity_id": None,
                    "relation_type": None
                }
                sentence['relationships'].append(relation)    
            
            # Append the sentence dict to the abstract's sentences list
            abstract['sentences'].append(sentence)
        
        # Append the abstract dict to the samples list
        samples.append(abstract)
    
    return samples

samples = convert_to_samples(abstracts,sentences,entities,relations)

In [None]:
s = samples[1600:1700]
s[30]

{'pmid': 30496157,
 'title': 'Outbreak of Dengue Virus Type 2 - American Samoa, November 2016-October 2018.',
 'date_of_publication': datetime.datetime(2018, 11, 30, 0, 0),
 'mesh_keywords': ['Adolescent',
  'Adult',
  'Aged',
  'Aged',
  '80 and over',
  'American Samoa/epidemiology',
  'Child',
  'Child',
  'Preschool',
  'Dengue/*epidemiology/*virology',
  'Dengue Virus/classification/genetics/*isolation & purification',
  '*Disease Outbreaks',
  'Female',
  'Humans',
  'Infant',
  'Infant',
  'Newborn',
  'Male',
  'Middle Aged',
  'Young Adult'],
 'sentences': [{'sentence_id': 'c6e0a01936b45a227524431f868924f0469861cbf54b9645945a2c6f7c47c027',
   'text': 'The U.S. territory of American Samoa has experienced recent outbreaks of illnesses caused by viruses transmitted by Aedes species mosquitoes, including dengue, chikungunya, and Zika virus.',
   'entities': [],
   'relationships': [{'source_entity_id': None,
     'target_entity_id': None,
     'relation_type': None}]},
  {'sentenc

In [15]:
len(s)

100

In [11]:
from typing import List

def create_D3N(papers_data: List[Dict[str, Any]], graph: 'EntityKnowledgeGraph', log_dir: str) -> None:
    """Create or update a paper with sentences, entities, and relationships in a Neo4j graph database."""
    
    # Initialize logger
    logger = setup_logger('D3N_Creator', log_dir)
    
    for data in tqdm(papers_data,total=len(papers_data), desc="Pushing to the KG"):  # Directly iterate over the list of Document instances
        try:
            pmid = int(data.get('pmid')) # Access pmid directly from the Document instance
            logger.info(f"Processing paper with PMID: {pmid}")

            # Create or update paper entry in the graph
            graph.create_or_update_paper({
                'pmid': pmid,
                'title': data.get('title'),
                'date_of_publication': data.get('date_of_publication'),
                'mesh_keywords': data.get('mesh_keywords', [])
            })

            # Step 2: Create or update sentences, entities, and relationships
            for sentence in data.get('sentences', []):  # Access sentences directly
                sentence_id = sentence.get('sentence_id')
                if not sentence_id or not sentence.get('entities'):
                    continue

                # Update sentence with primary key `pmid` and sentence data
                graph.create_or_update_sentence(pmid, {
                    'sentence_id': sentence_id,
                    'text': sentence.get('text')
                })

                # Process entities and relationships within the sentence
                for entity in sentence.get('entities', []):  # Iterate over entities in the sentence
                    graph.create_or_update_entity(sentence_id, {
                        'entity_id': entity.get('entity_id'),
                        'name': entity.get('name'),
                        'type': entity.get('type'),
                        'sent_ent_edge_id': entity.get('sent_ent_edge_id')
                    })

                # Process relationships between entities
                for relationship in sentence.get('relationships', []):  # Iterate over relationships in the sentence
                    source_id = relationship.get('source_entity_id')
                    target_id = relationship.get('target_entity_id')
                    relation_type = relationship.get('relation_type')
                    relation_id = relationship.get('relation_id')
                    
                    if source_id and target_id and relation_type and relation_id:
                        graph.create_or_update_relationship(
                            source_id=source_id,
                            target_id=target_id,
                            relation_type=relation_type,
                            relation_id=relation_id
                        )

            logger.info(f"Successfully processed paper with PMID: {pmid}")

        except Exception as e:
            logger.error(f"Failed to process paper with PMID {data.get('pmid')}: {str(e)}")
        
    logger.info("Data push to knowledge graph completed.")


### This is the second

In [18]:
"""
DO NOT USE IT IF YOU HAVE ALREADY EXECUTED THE PREVIOUS CELL.
"""
s = samples[8000:]
print(len(s))
s[30]

1559


{'pmid': 34316455,
 'title': 'Study of the Langat virus RNA-dependent RNA polymerase through homology modeling.',
 'date_of_publication': datetime.datetime(2021, 1, 1, 0, 0),
 'mesh_keywords': ['nan'],
 'sentences': [{'sentence_id': 'd0571027b27d97ff346ec55ffbd6b1710489518a847da54c33f098fc7e39a77b',
   'text': 'Langat virus is a member of the Flaviviridae family and a close relative of a group of important tick-borne viruses that cause human encephalitis.',
   'entities': [],
   'relationships': [{'source_entity_id': None,
     'target_entity_id': None,
     'relation_type': None}]},
  {'sentence_id': 'c111ebbccb62d1048e17c2e3e1d2254b7091e807b240654ff7ffa821c7b76244',
   'text': 'RNA-dependent RNA polymerase is a significant component of the replication mechanism of the Flaviviridae viral family.',
   'entities': [{'entity_id': 'da25e8463473555a7534576ff559ff7c96ed9fcba082c871c95815b56bbb6684',
     'name': 'RNA-dependent RNA polymerase',
     'type': 'PROTEIN',
     'sent_ent_edge_id'

In [19]:
# Initialize components with logging
log_dir = "D:/CSE498R_Resources/D3N/Dengue-Drug-Discovery-Network-D3N/Logs/"
setup_database(uri="bolt://localhost:7687", 
              username="neo4j",
              password="adminPassword",
              database_name="D3N",
              log_dir=log_dir)
# Initialize graph
graph = EntityKnowledgeGraph(uri="bolt://localhost:7687", 
              username="neo4j",
              password="adminPassword",
              database_name="D3N",
              log_dir=log_dir)

create_D3N(s, graph,log_dir)

Pushing to the KG: 100%|██████████| 1559/1559 [20:39<00:00,  1.26it/s]
