In [2]:
import csv
import spacy
from collections import defaultdict
from datetime import datetime

# Load spaCy's English language model
nlp = spacy.load('en_core_web_sm')

# Read the articles from TOIFarmersProtest.csv
articles_file_path = 'TOI_FarmersProtests.csv'

# Dictionary to hold entity pairs with date ranges and weights
entity_pairs = defaultdict(lambda: defaultdict(int))

with open(articles_file_path, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        date_str = row['Date']
        # Parse the date
        date_obj = datetime.strptime(date_str, '%d-%m-%Y')
        # Convert date to month-year format (e.g., 'Jan-2020')
        date_range = date_obj.strftime('%b-%Y')

        # Extract entities from the article text
        text = row['Text']
        doc = nlp(text)
        entities = [ent.text.strip() for ent in doc.ents if ent.label_ in ('PERSON', 'ORG', 'GPE', 'LOC')]

        # Remove duplicates and sort entities
        unique_entities = sorted(set(entities))

        # Create all possible pairs of entities
        for i in range(len(unique_entities)):
            for j in range(i+1, len(unique_entities)):
                e1 = unique_entities[i]
                e2 = unique_entities[j]

                # Increment the weight for this pair in this date range
                entity_pairs[(e1, e2)][date_range] += 1

# Now, create a list of dictionaries to write to CSV
output_data = []

for (e1, e2), date_dict in entity_pairs.items():
    for date_range, weight in date_dict.items():
        output_data.append({
            'entity1': e1,
            'entity2': e2,
            'date_range': date_range,
            'weight': weight
        })

# Write the output data to a CSV file
output_csv_file = 'entity_pairs_with_dates.csv'
fieldnames = ['entity1', 'entity2', 'date_range', 'weight']

with open(output_csv_file, 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(output_data)


In [6]:
from neo4j import GraphDatabase
import csv

uri = "neo4j://localhost:7687"  # Change this to your Neo4j instance URI
username = "neo4j"             # Replace with your username
password = "mediagraph"        # Replace with your password
driver = GraphDatabase.driver(uri, auth=(username, password))

def add_timestamped_relationship(tx, entity1, entity2, date_range, weight):
    query = """
    MERGE (e1:Entity {name: $entity1})
    MERGE (e2:Entity {name: $entity2})
    MERGE (e1)-[r:CO_OCCURS_WITH {date_range: $date_range}]->(e2)
    ON CREATE SET r.weight = $weight
    ON MATCH SET r.weight = r.weight + $weight
    """
    tx.run(query, entity1=entity1, entity2=entity2, date_range=date_range, weight=weight)


csv_file_path = 'entity_pairs_with_dates.csv'

with open(csv_file_path, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    data = [row for row in reader]

with driver.session() as session:
    for row in data:
        entity1 = row['entity1']
        entity2 = row['entity2']
        date_range = row['date_range']
        weight = int(row['weight'])
        
        session.write_transaction(add_timestamped_relationship, entity1, entity2, date_range, weight)

driver.close()


  session.write_transaction(add_timestamped_relationship, entity1, entity2, date_range, weight)
