In [1]:
import pandas as pd
import dagster as dg
from datetime import datetime, timedelta, timezone
import os
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
from eventregistry import EventRegistry, QueryArticlesIter, QueryItems

load_dotenv()

True

In [2]:
er = EventRegistry(apiKey=os.getenv("NEWSAPI_KEY"))

In [3]:
q = QueryArticlesIter(
	categoryUri=QueryItems.AND(["dmoz/Computers/Artificial_Intelligence", "dmoz/Business/Marketing_and_Advertising"]),
	lang="eng",
	dateStart=(datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
)
articles = []
for article in q.execQuery(er, sortBy="date", maxItems=100):
	articles.append(article)

In [4]:
df = pd.DataFrame(articles)
df

Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,url,title,body,source,authors,image,eventUri,sentiment,wgt,relevance
0,2025-12-925823602,eng,False,2025-12-17,14:43:31,2025-12-17T14:43:31Z,2025-12-17T14:16:49Z,news,0.00000,https://martechseries.com/predictive-ai/ai-pla...,Akii Launches AI Engage to Train AI Search Eng...,AI Engage enables brands to systematically edu...,"{'uri': 'martechseries.com', 'dataType': 'news...",[],https://martechseries.com/wp-content/uploads/2...,,0.301961,503678611,7
1,2025-12-925799680,eng,True,2025-12-17,14:17:30,2025-12-17T14:17:30Z,2025-12-17T13:34:08Z,news,0.00000,https://za.investing.com/news/stock-market-new...,Airbnb is 'an increasingly attractive brand mo...,Investing.com -- RBC Capital upgraded Airbnb t...,"{'uri': 'za.investing.com', 'dataType': 'news'...",[],https://i-invdn-com.investing.com/news/LYNXMPE...,,0.466667,503677050,4
2,2025-12-925798781,eng,True,2025-12-17,14:15:58,2025-12-17T14:15:58Z,2025-12-17T14:04:08Z,news,0.00000,https://au.finance.yahoo.com/news/airbnb-incre...,Airbnb is 'an increasingly attractive brand mo...,Investing.com -- RBC Capital upgraded Airbnb t...,"{'uri': 'au.finance.yahoo.com', 'dataType': 'n...","[{'uri': 'sam_boughedda@au.finance.yahoo.com',...",https://s.yimg.com/cv/apiv2/cv/apiv2/social/im...,,0.466667,503676958,4
3,2025-12-925792312,eng,True,2025-12-17,14:10:14,2025-12-17T14:10:14Z,2025-12-17T14:10:02Z,news,0.00000,https://www.streetinsider.com/Investing/Airbnb...,Airbnb is 'an increasingly attractive brand mo...,Investing.com -- RBC Capital upgraded Airbnb t...,"{'uri': 'streetinsider.com', 'dataType': 'news...",[],http://www.streetinsider.com/images/news2/257/...,,0.466667,503676614,4
4,2025-12-925792327,eng,False,2025-12-17,14:10:12,2025-12-17T14:10:12Z,2025-12-17T14:10:02Z,news,0.47451,https://www.streetinsider.com/Business+Wire/Ne...,"NexStrat AI, the First Enterprise-Grade AI Man...","Built by former Bain, BCG, Deloitte, and PwC c...","{'uri': 'streetinsider.com', 'dataType': 'news...",[],http://www.streetinsider.com/images/silogo-new...,eng-11235366,0.333333,503676612,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2025-12-923409648,eng,False,2025-12-15,12:12:42,2025-12-15T12:12:42Z,2025-12-15T12:06:00Z,news,0.00000,https://aithority.com/machine-learning/voksha-...,Voksha Launches AI-Powered Call Intelligence P...,Revolutionary System Transforms Every Phone Ca...,"{'uri': 'aithority.com', 'dataType': 'news', '...","[{'uri': 'ein_presswire@aithority.com', 'name'...",https://aithority.com/wp-content/uploads/2025/...,,0.294118,503496762,7
96,2025-12-923389975,eng,False,2025-12-15,11:53:43,2025-12-15T11:53:43Z,2025-12-15T11:21:36Z,news,0.00000,https://ididthat.co/i-did-that-guest-judge-mel...,I DID THAT Guest Judge: Melusi Mhlungu - IDIDT...,Melusi Mhlungu is an award-winning creative wi...,"{'uri': 'ididthat.co', 'dataType': 'news', 'ti...","[{'uri': 'amisha_zanetti@ididthat.co', 'name':...",https://ididthat.co/wp-content/uploads/2020/02...,,0.396078,503495623,3
97,2025-12-923370210,eng,False,2025-12-15,11:35:18,2025-12-15T11:35:18Z,2025-12-15T11:25:52Z,news,0.00000,https://finance.yahoo.com/news/wpp-crashes-60-...,WPP Crashes 60%--But a Shock AI Twist Could Fl...,This article first appeared on GuruFocus.\n\nM...,"{'uri': 'finance.yahoo.com', 'dataType': 'news...","[{'uri': 'khac_phu_nguyen@finance.yahoo.com', ...",https://media.zenfs.com/en/us.finance.gurufocu...,,-0.035294,503494518,6
98,2025-12-923367927,eng,True,2025-12-15,11:32:00,2025-12-15T11:32:00Z,2025-12-15T11:00:28Z,news,0.00000,https://uk.finance.yahoo.com/video/investors-a...,Investors' AI strategies should include these ...,Charles Schwab director and senior investment ...,"{'uri': 'uk.finance.yahoo.com', 'dataType': 'n...",[{'uri': 'yahoo_finance_video@uk.finance.yahoo...,https://s.yimg.com/ny/api/res/1.2/FCHvzZlBTIkU...,,0.239216,503494320,6


In [5]:
# Add metadata columns
df["fetched_at"] = datetime.now()

# Map eventregistry fields to consistent schema
if "uri" in df.columns:
	df["article_id"] = df["uri"]
if "url" not in df.columns and "uri" in df.columns:
	df["url"] = df["uri"]
if "dateTime" in df.columns:
	df["publishedAt"] = pd.to_datetime(df["dateTime"])
elif "date" in df.columns:
	df["publishedAt"] = pd.to_datetime(df["date"])

# Handle source information
if "source" in df.columns:
	df["source_name"] = df["source"].apply(
		lambda x: x.get("title")
		if isinstance(x, dict)
		else str(x)
		if x
		else None
	)
	df["source_uri"] = df["source"].apply(
		lambda x: x.get("uri") if isinstance(x, dict) else None
	)

In [6]:
df

Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,url,...,image,eventUri,sentiment,wgt,relevance,fetched_at,article_id,publishedAt,source_name,source_uri
0,2025-12-925823602,eng,False,2025-12-17,14:43:31,2025-12-17T14:43:31Z,2025-12-17T14:16:49Z,news,0.00000,https://martechseries.com/predictive-ai/ai-pla...,...,https://martechseries.com/wp-content/uploads/2...,,0.301961,503678611,7,2025-12-17 16:18:20.997146,2025-12-925823602,2025-12-17 14:43:31+00:00,MarTech Series,martechseries.com
1,2025-12-925799680,eng,True,2025-12-17,14:17:30,2025-12-17T14:17:30Z,2025-12-17T13:34:08Z,news,0.00000,https://za.investing.com/news/stock-market-new...,...,https://i-invdn-com.investing.com/news/LYNXMPE...,,0.466667,503677050,4,2025-12-17 16:18:20.997146,2025-12-925799680,2025-12-17 14:17:30+00:00,Investing.com South Africa,za.investing.com
2,2025-12-925798781,eng,True,2025-12-17,14:15:58,2025-12-17T14:15:58Z,2025-12-17T14:04:08Z,news,0.00000,https://au.finance.yahoo.com/news/airbnb-incre...,...,https://s.yimg.com/cv/apiv2/cv/apiv2/social/im...,,0.466667,503676958,4,2025-12-17 16:18:20.997146,2025-12-925798781,2025-12-17 14:15:58+00:00,Yahoo7 Finance,au.finance.yahoo.com
3,2025-12-925792312,eng,True,2025-12-17,14:10:14,2025-12-17T14:10:14Z,2025-12-17T14:10:02Z,news,0.00000,https://www.streetinsider.com/Investing/Airbnb...,...,http://www.streetinsider.com/images/news2/257/...,,0.466667,503676614,4,2025-12-17 16:18:20.997146,2025-12-925792312,2025-12-17 14:10:14+00:00,StreetInsider.com,streetinsider.com
4,2025-12-925792327,eng,False,2025-12-17,14:10:12,2025-12-17T14:10:12Z,2025-12-17T14:10:02Z,news,0.47451,https://www.streetinsider.com/Business+Wire/Ne...,...,http://www.streetinsider.com/images/silogo-new...,eng-11235366,0.333333,503676612,7,2025-12-17 16:18:20.997146,2025-12-925792327,2025-12-17 14:10:12+00:00,StreetInsider.com,streetinsider.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2025-12-923409648,eng,False,2025-12-15,12:12:42,2025-12-15T12:12:42Z,2025-12-15T12:06:00Z,news,0.00000,https://aithority.com/machine-learning/voksha-...,...,https://aithority.com/wp-content/uploads/2025/...,,0.294118,503496762,7,2025-12-17 16:18:20.997146,2025-12-923409648,2025-12-15 12:12:42+00:00,AiThority,aithority.com
96,2025-12-923389975,eng,False,2025-12-15,11:53:43,2025-12-15T11:53:43Z,2025-12-15T11:21:36Z,news,0.00000,https://ididthat.co/i-did-that-guest-judge-mel...,...,https://ididthat.co/wp-content/uploads/2020/02...,,0.396078,503495623,3,2025-12-17 16:18:20.997146,2025-12-923389975,2025-12-15 11:53:43+00:00,IDIDTHAT.co,ididthat.co
97,2025-12-923370210,eng,False,2025-12-15,11:35:18,2025-12-15T11:35:18Z,2025-12-15T11:25:52Z,news,0.00000,https://finance.yahoo.com/news/wpp-crashes-60-...,...,https://media.zenfs.com/en/us.finance.gurufocu...,,-0.035294,503494518,6,2025-12-17 16:18:20.997146,2025-12-923370210,2025-12-15 11:35:18+00:00,Yahoo! Finance,finance.yahoo.com
98,2025-12-923367927,eng,True,2025-12-15,11:32:00,2025-12-15T11:32:00Z,2025-12-15T11:00:28Z,news,0.00000,https://uk.finance.yahoo.com/video/investors-a...,...,https://s.yimg.com/ny/api/res/1.2/FCHvzZlBTIkU...,,0.239216,503494320,6,2025-12-17 16:18:20.997146,2025-12-923367927,2025-12-15 11:32:00+00:00,Yahoo! Finance,uk.finance.yahoo.com


In [7]:

{
	"num_articles": len(df),
	"last_fetch_timestamp": dg.MetadataValue.timestamp(datetime.now(timezone.utc)),
	"preview": dg.MetadataValue.md(
		df[["title", "source_name", "publishedAt"]].head(5).to_markdown()
		if not df.empty
		and all(
			col in df.columns
			for col in ["title", "source_name", "publishedAt"]
		)
		else df.head(5).to_markdown()
		if not df.empty
		else "No articles"
	),
}

{'num_articles': 100,
 'last_fetch_timestamp': TimestampMetadataValue(value=1765984702.788),
 'preview': MarkdownMetadataValue(md_str="|    | title                                                                                                                      | source_name                | publishedAt               |\n|---:|:---------------------------------------------------------------------------------------------------------------------------|:---------------------------|:--------------------------|\n|  0 | Akii Launches AI Engage to Train AI Search Engines on Brand Content                                                        | MarTech Series             | 2025-12-17 14:43:31+00:00 |\n|  1 | Airbnb is 'an increasingly attractive brand monetization story' says RBC By Investing.com                                  | Investing.com South Africa | 2025-12-17 14:17:30+00:00 |\n|  2 | Airbnb is 'an increasingly attractive brand monetization story' says RBC                          