In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("seaborn-v0_8")

In [7]:
from pathlib import Path

# Resolve project root (works regardless of notebook kernel cwd)
def get_project_root():
    p = Path.cwd()
    for _ in range(6):
        if (p / "environment.yml").exists() or (p / "README.md").exists():
            return p
        if p.parent == p:
            break
        p = p.parent
    return Path.cwd()

PROJECT_ROOT = get_project_root()
DATA_DIR = PROJECT_ROOT / "data" / "processed"

gdelt = pd.read_csv(DATA_DIR / "gdelt_articles_accumulated.csv", parse_dates=["seendate"])
ohlcv = pd.read_csv(DATA_DIR / "prices_daily_accumulated.csv", parse_dates=["date"])

print("GDELT:", gdelt.shape)
print("OHLCV:", ohlcv.shape)

GDELT: (131, 8)
OHLCV: (140, 8)


In [8]:
# Confirms and displays number of unique days
gdelt["seendate"] = pd.to_datetime(gdelt["seendate"], errors="coerce")

print("rows:", len(gdelt))
print("null seendate:", gdelt["seendate"].isna().sum())
print("min:", gdelt["seendate"].min())
print("max:", gdelt["seendate"].max())

# how many unique days?
days = gdelt["seendate"].dt.floor("D")
print("unique days:", days.nunique())

# show top days by article count
print(days.value_counts().head(10))

rows: 131
null seendate: 0
min: 2026-01-27 05:15:00+00:00
max: 2026-01-27 21:00:00+00:00
unique days: 1
seendate
2026-01-27 00:00:00+00:00    131
Name: count, dtype: int64


In [4]:
# gdelt.groupby(gdelt["seendate"].dt.to_period("D")).size().plot(
#     figsize=(14, 4),
#     title="GDELT Articles per Day"
# )

# gdelt.groupby(gdelt["seendate"].dt.to_period("D")).size() \
#      .rename_axis("date") \
#      .reset_index(name="count") \
#      .assign(date=lambda df: df["date"].dt.to_timestamp()) \
#      .set_index("date")["count"] \
#      .plot(figsize=(14, 4))


In [None]:
# Case of 1 unique day
ts = gdelt.set_index("seendate").resample("D").size()

ax = ts.plot(figsize=(14,4), marker="o", title="GDELT Articles per Day")
ax.set_ylabel("articles")
