In [1]:
%load_ext autoreload
%autoreload 2

import logging 
import sys
sys.path.append("..")
import pandas as pd

logger = logging.getLogger(__name__)

# Importing the LLM models and the necessary modules

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

openai_api_key = "###"

openai_llm_model = ChatOpenAI(
    api_key = openai_api_key,
    model="gpt-4.1-2025-04-14",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

openai_embeddings_model = OpenAIEmbeddings(
    api_key = openai_api_key ,
    model="text-embedding-3-large",
)


# Loading the datasets

In [9]:
df_nyt = pd.read_pickle("../datasets/nyt_news/2020_nyt_COVID_last_version_ready.pkl")

# Atomic Facts

The atomic facts are already ready from our dataset. We will use them directly.

In [10]:
import ast 

def to_dictionary(df): 

    if isinstance(df['factoids_g_truth'][0], str):
        df["factoids_g_truth"] = df["factoids_g_truth"].apply(lambda x:ast.literal_eval(x))
    grouped_df = df.groupby("date")["factoids_g_truth"].sum().reset_index()
    return {
        str(date): factoids for date, factoids in grouped_df.set_index("date")["factoids_g_truth"].to_dict().items()
        }
news = to_dictionary(df_nyt)

In [13]:
dict(list(news.items())[:5])

{'2020-01-09 00:00:00': ['Chinese researchers identified a new virus behind an illness',
  'The new virus infected dozens of people across Asia',
  'The outbreak of the new virus raised fears in Asia',
  'Asia experienced a deadly epidemic 17 years before January 09 2020.'],
 '2020-01-23 00:00:00': ['A mysterious respiratory virus was first found in Wuhan in December 2019.',
  'Authorities limited travel in cities in China, including Wuhan, due to the spread of a mysterious respiratory virus.',
  'The mysterious respiratory virus spread across China',
  'The mysterious respiratory virus spread to at least 10 other countries.'],
 '2020-01-27 00:00:00': ['U.S. stock futures declined sharply on January 27, 2020.',
  'The decline in U.S. stock futures on January 27,2020, was due to fears about the coronavirus outbreak.'],
 '2020-01-28 00:00:00': ["President Trump's lawyers began their defense on the third day on January 28, 2020.",
  'The Republican-controlled Senate faced the question of 

In [None]:
from atom import Atom

atom = Atom(llm_model=openai_llm_model, embeddings_model=openai_embeddings_model)

kg = await atom.build_graph_from_different_obs_times(atomic_facts_with_obs_timestamps=dict(list(news.items())[:5]))

In [17]:
kg.relationships

[Relationship(name='raise_fears_in', startEntity=Entity(name='outbreak of the new virus', label='event', properties=EntityProperties(embeddings=array([-0.02740944,  0.00179138,  0.01016376, ...,  0.00646361,
        -0.00670968, -0.01349134], shape=(3072,)))), endEntity=Entity(name='asia', label='location', properties=EntityProperties(embeddings=array([ 0.00331179,  0.02424994,  0.0002105 , ...,  0.0215823 ,
        -0.00640898,  0.01664826], shape=(3072,)))), properties=RelationshipProperties(embeddings=array([-0.01960878, -0.01421717, -0.00448499, ...,  0.00588103,
        -0.00034425,  0.02127761], shape=(3072,)), sources=['The outbreak of the new virus raised fears in Asia'], timestamps=[1578524400.0], t_start=[], t_end=[])),
 Relationship(name='experiences', startEntity=Entity(name='asia', label='location', properties=EntityProperties(embeddings=array([ 0.00331179,  0.02424994,  0.0002105 , ...,  0.0215823 ,
        -0.00640898,  0.01664826], shape=(3072,)))), endEntity=Entity(nam

# Draw the DTKG in neo4j

In [None]:
from atom.graph_integration import Neo4jStorage


URI = "bolt://localhost:7687"
USERNAME = "neo4j"
PASSWORD = "###"

Neo4jStorage(uri=URI, username=USERNAME, password=PASSWORD).visualize_graph(knowledge_graph=kg)