In [8]:
# pip install pygooglenews feedparser python-dateutil pandas
from pygooglenews import GoogleNews
from dateutil import tz
from datetime import datetime
import hashlib, os, re
import pandas as pd
from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
import time

In [9]:
ALL_QUERIES = [

    # --- ENTITY QUERIES ---
    '("BNY Mellon" OR "Bank of New York Mellon" OR "BNYM" OR BK OR Pershing OR Dreyfus) -gossip -celebrity',

    # --- CROSS-INDUSTRY RISK SIGNALS ---
    '("bankruptcy" OR "default" OR "insolvency" OR "credit downgrade" OR "ratings action" OR '
    '"profit warning" OR "earnings miss" OR "liquidity crisis" OR '
    '"regulatory action" OR "SEC investigation" OR "DOJ" OR "fraud charges" OR "antitrust" OR '
    '"cyber attack" OR "data breach" OR "ransomware" OR '
    '"labor strike" OR "union action" OR "walkout" OR '
    '"supply chain disruption" OR "plant shutdown" OR "recall" OR '
    '"sanctions" OR "export controls" OR "geopolitical tension") '
    '-"final score" -"game recap" -"match highlights" -"fantasy sports" -"transfer rumor" -"trade rumor" -"player injury update" -"celebrity athlete"',

    # --- SECTOR COVERAGE ---
    '(auto OR automotive OR EV OR semiconductor OR technology OR AI OR energy OR utilities OR '
    '"real estate" OR construction OR manufacturing OR pharmaceuticals OR biotechnology OR '
    'retail OR airlines OR shipping OR telecom OR banking OR finance OR media)',

    # --- MARKET / FINANCIAL RISK THEMES ---
    '("interest rates" OR "credit spreads" OR volatility OR "FX markets" OR '
    '"repo market" OR liquidity OR derivatives OR funding)',

    # --- THEMATIC FINANCE + REGULATORY (FROM ORIGINAL THEME_QUERIES) ---
    '"asset servicing" OR custody OR "securities services"',
    'FX OR "repo market" OR "liquidity management" OR "securities lending"',
    '"Basel III" OR "Basel endgame" OR "capital requirements" OR "operational resilience"',
    '"rating action" OR "credit watch" OR "outlook revised"',
    '"tokenized assets" OR "market infrastructure" OR "T+1"'
]

In [10]:
LANG = "en"
COUNTRY = "US"
WHEN = "1d"   # rolling window for continuous pulls; use from_/to_ in backfill()
RUN_DT = datetime.now(tz=tz.tzlocal())
DIR = 'data'
# Output dirs
RAW_DIR = f"raw/{RUN_DT:%Y-%m-%d}"
STATE_DIR = "state"
MASTER_DIR = "master"
MASTER_PATH = f"{DIR}/realtime_data_feed2.csv"
SEEN_PATH = f"{DIR}/gnews_seen_ids.csv"

os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(STATE_DIR, exist_ok=True)
os.makedirs(MASTER_DIR, exist_ok=True)

# -------- Helpers --------
def clean_url(u: str) -> str:
    """Remove common tracking params, keep a stable canonical-ish URL."""
    try:
        p = urlparse(u)
        if not p.scheme:
            return u
        q = [(k, v) for k, v in parse_qsl(p.query, keep_blank_values=True)
             if not re.match(r'^(utm_|gclid|fbclid)$', k, re.I)]
        return urlunparse((p.scheme, p.netloc, p.path, "", urlencode(q), ""))
    except Exception:
        return u

def make_id(title: str, canonical_link: str) -> str:
    base = f"{(title or '').strip()}|{(canonical_link or '').strip()}"
    return hashlib.sha256(base.encode("utf-8")).hexdigest()

def load_seen(path: str) -> set:
    if not os.path.exists(path): return set()
    try:
        return set(pd.read_csv(path)["id"].astype(str).tolist())
    except Exception:
        return set()

def append_seen(path: str, new_ids: list[str], run_ts: str) -> None:
    if not new_ids: return
    df = pd.DataFrame({"id": new_ids, "first_seen_at": run_ts})
    header = not os.path.exists(path)
    df.to_csv(path, mode="a", header=header, index=False)

def write_master(rows: list[dict]) -> None:
    if not rows: return
    df = pd.DataFrame(rows)
    header = not os.path.exists(MASTER_PATH)
    # df.to_csv(MASTER_PATH, mode="a", header=header, index=False)
    

# -------- Main pull --------
def run_pull():
    gn = GNews(lang=LANG, country=COUNTRY)
    seen = load_seen(SEEN_PATH)
    run_iso = RUN_DT.isoformat()
    raw_out = os.path.join(RAW_DIR, f"google_news_raw_{RUN_DT:%Y%m%d_%H%M}.csv")

    raw_rows, master_rows, new_ids = [], [], []

    for q in ALL_QUERIES:
        res = gn.search(q, when=WHEN)  # swap to from_/to_ in backfill()
        entries = (res or {}).get("entries", []) or []
        for e in entries:
            title = e.get("title", "")
            link = e.get("link", "")
            src = (e.get("source", {}) or {}).get("title", "")
            summary = e.get("summary", "")
            published = e.get("published", "")
            canonical = clean_url(link)
            _id = make_id(title, canonical)

            row = {
                "id": _id,
                "title": title,
                "link": canonical,
                "published": published,
                "source": src,
                "summary": summary,
                "query": q,
                "fetched_at": run_iso
            }
            raw_rows.append(row)

            if _id not in seen:
                master_rows.append(row)
                new_ids.append(_id)

    # Store raw data pull
    pd.DataFrame(raw_rows).to_csv(raw_out, index=False)
    
    # Return filtered df
    return pd.DataFrame(master_rows)
    append_seen(SEEN_PATH, new_ids, run_iso)

In [11]:
## Main Script
start = time.time()

df = run_pull()
header = not os.path.exists(MASTER_PATH)
# df.to_csv(MASTER_PATH, mode="a", header=header, index=False)
# append_seen(SEEN_PATH, new_ids, run_iso)

# fetch = time.time()


In [20]:
raw_out

NameError: name 'raw_out' is not defined

In [12]:
df

Unnamed: 0,id,title,link,published,source,summary,query,fetched_at
0,5bc55c5c7e34e8b58cd7a8cc961e46c23dfdbe3db7288e...,Bank of New York Mellon Stock Rises on Earning...,https://news.google.com/rss/articles/CBMigwNBV...,"Tue, 13 Jan 2026 15:50:00 GMT",Barron's,"<a href=""https://news.google.com/rss/articles/...","(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
1,c70561ad0ba56c37bd19752874cc9f8fdabb7b84a358b1...,Bank of New York Mellon Q4 Earnings Call Highl...,https://news.google.com/rss/articles/CBMif0FVX...,"Tue, 13 Jan 2026 19:06:54 GMT",Yahoo Finance,"<a href=""https://news.google.com/rss/articles/...","(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
2,f5def2c6cf9adbda0db93d7e8888ff0df68b9057058bff...,"FINRA fines BNY Mellon Capital Markets $60,000...",https://news.google.com/rss/articles/CBMisgFBV...,"Tue, 13 Jan 2026 18:41:00 GMT",Bond Buyer,"<a href=""https://news.google.com/rss/articles/...","(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
3,44d28c16208c8af49845e96af478bbd515993b4c20bf90...,Earnings call transcript: Bank of New York Mel...,https://news.google.com/rss/articles/CBMizgFBV...,"Tue, 13 Jan 2026 18:06:45 GMT",Investing.com,"<a href=""https://news.google.com/rss/articles/...","(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
4,9a7d93e03efc22a1f5f98f97e747ab40e00a97c9e959f3...,"Bank Of New York Mellon: Solid Q4, But Margin ...",https://news.google.com/rss/articles/CBMiqgFBV...,"Tue, 13 Jan 2026 15:52:09 GMT",Seeking Alpha,"<a href=""https://news.google.com/rss/articles/...","(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
...,...,...,...,...,...,...,...,...
684,7cb673a91bd817e5ed042d1c7ef5f57911c3d78577bd31...,Bloomberg defers inclusion of Indian bonds in ...,https://news.google.com/rss/articles/CBMiyAFBV...,"Tue, 13 Jan 2026 07:42:50 GMT",ET Now,"<a href=""https://news.google.com/rss/articles/...","""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00
685,fe40746055e860d67455e32f7b7acfe8de263cb5c86dd6...,Bloomberg Index Services Postpones Indian Bond...,https://news.google.com/rss/articles/CBMi0gFBV...,"Tue, 13 Jan 2026 06:59:48 GMT",scanx.trade,"<a href=""https://news.google.com/rss/articles/...","""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00
686,1240a4d280bc3da87183832d9f8362ac73ba6f2d1f7dd9...,India bonds fall after Bloomberg index entry s...,https://news.google.com/rss/articles/CBMiVkFVX...,"Tue, 13 Jan 2026 12:35:30 GMT",Business Recorder,"<a href=""https://news.google.com/rss/articles/...","""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00
687,797242bb275685307d4420e4131f349f17527fc9ae7721...,Bloomberg defers Inclusion of Indian govt bond...,https://news.google.com/rss/articles/CBMiZEFVX...,"Tue, 13 Jan 2026 06:48:31 GMT",Daijiworld,"<a href=""https://news.google.com/rss/articles/...","""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00


In [13]:
# import google.generativeai as genai

# genai.configure(api_key="AIzaSyAm-pqLVce_uYDyOHvJn-wHFaSHp2j3jt8")
# model = genai.GenerativeModel("models/gemini-2.5-flash")

# llm_c = 0
# def llm_counter(link):
#     global llm_c
#     llm_c += 1
#     return model.generate_content(f"For the article in \
#     this link, {link}, provide me a summary of the \
#     article. 2-3 sentences. If the link is not found or \
#     if unable to, only output: <none>").text
    
# datafetch = time.time()
# df["summary"] = df["link"].apply(
#     lambda x: llm_counter(x)
# )
# llm_summary_time = time.time()

In [14]:
import time, json, math
import google.generativeai as genai
from typing import List, Dict

genai.configure(api_key="AIzaSyAm-pqLVce_uYDyOHvJn-wHFaSHp2j3jt8")
MODEL_NAME = "models/gemini-2.5-flash"

SYSTEM_INSTRUCTIONS = (
    "You will receive a list of items, each with an 'id' and a 'url'. "
    "For each url, write a concise 2-3 sentence summary of the article at that link. "
    "Return ONLY a JSON array where each element is an object with keys: "
    "id (copied from input) and summary (a string). "
    "Do not include any other keys. Do not include any text outside the JSON. "
    "Do not include comments, markdown, or explanations."
    "If the link is not found or if unable to summarize, only output: <none>"
)

def build_prompt(batch_items: List[Dict]) -> str:
    """
    Build a single prompt with up to 40 items.
    We serialize the input as JSON so the model can return parallel JSON.
    """
    return (
        SYSTEM_INSTRUCTIONS
        + "\n\nINPUT_JSON:\n"
        + json.dumps(batch_items, ensure_ascii=False)
    )

def call_gemini_json(prompt: str, max_retries: int = 3, base_sleep: float = 8.0):
    """
    Call Gemini and parse strict JSON. Retries on quota errors (429) with backoff.
    Returns a Python object parsed from the model's JSON text.
    """
    model = genai.GenerativeModel(MODEL_NAME)
    for attempt in range(1, max_retries + 1):
        try:
            resp = model.generate_content(prompt)
            text = resp.text or ""
            # Try to extract JSON directly
            return json.loads(text)
        except json.JSONDecodeError:
            # If the model wrapped JSON in code fences or added text, try to salvage
            stripped = text.strip()
            # crude fence removal if present
            if stripped.startswith("```"):
                stripped = stripped.strip("`")
                # remove leading language hints like json
                if "\n" in stripped:
                    stripped = stripped.split("\n", 1)[1]
            try:
                return json.loads(stripped)
            except Exception:
                if attempt == max_retries:
                    raise
        except Exception as e:
            # Simple detection of quota/429; fall back to fixed backoff
            # Many Gemini quota errors surface as RESOURCE_EXHAUSTED with suggested retry.
            sleep_s = base_sleep * attempt
            time.sleep(sleep_s)
            if attempt == max_retries:
                raise

# def summarize_links_in_batches(
#     df,
#     link_col: str = "link",
#     out_col: str = "summary",
#     batch_size: int = 40,
#     max_calls: int = 10,
# ) -> None:
#     """
#     Summarize links in df[link_col] using Gemini in batches of <= batch_size.
#     Writes results to df[out_col]. Makes at most max_calls API calls.
#     Partially fills df[out_col] even if later calls fail.
#     """

#     if out_col not in df.columns:
#         df[out_col] = None

#     # Select rows that still need summaries
#     todo_idx = df.index[df[out_col].isna() | (df[out_col] == "")].tolist()
#     if not todo_idx:
#         return  # nothing to do

#     # Cap total items by batch_size * max_calls
#     max_items = batch_size * max_calls
#     todo_idx = todo_idx[:max_items]

#     # Create batches
#     batches = [
#         todo_idx[i : i + batch_size] for i in range(0, len(todo_idx), batch_size)
#     ]

#     calls_made = 0
#     for batch_indices in batches:
#         if calls_made >= max_calls:
#             break

#         batch_items = [{"id": int(i), "url": str(df.at[i, link_col])} for i in batch_indices]
#         prompt = build_prompt(batch_items)

#         try:
#             result = call_gemini_json(prompt)
#         except Exception as e:
#             # Keep partial progress, then stop further calls (stay within max_calls intent)
#             # You could log/print e here if desired.
#             break

#         # Validate and write back
#         # Expecting: [{"id": <df_index>, "summary": "..."} ...]
#         if isinstance(result, list):
#             for obj in result:
#                 try:
#                     row_id = obj["id"]
#                     summary = obj["summary"]
#                     if row_id in df.index:
#                         df.at[row_id, out_col] = summary
#                 except Exception:
#                     # Skip malformed objects; continue with others
#                     continue

#         calls_made += 1

#         # Optional throttle to respect 10 RPM even if something else calls us rapidly.
#         # With max_calls=10 this caps at ~1 minute total if you keep it enabled.
#         # time.sleep(6)

#     # Done. df[out_col] now contains summaries for processed rows.
#     # Unprocessed rows remain None/"" and can be handled in a later run.

# # ---------------------------
# # Example usage
# # ---------------------------
# # summarize_links_in_batches(df, link_col="link", out_col="summary",
# #                            batch_size=40, max_calls=10)
# # df.head()


In [15]:
def summarize_links_in_batches(
    df,
    link_col: str = "link",
    out_col: str = "summary",
) -> None:
    """
    Summarize ONLY the first 400 links from a DataFrame that has > 400 rows.
    Uses the existing batching/JSON architecture (build_prompt + call_gemini_json).
    Writes results to df[out_col] for those 400 rows only.
    """
    import time
    if len(df) <= 400:
        raise ValueError("DataFrame must contain more than 400 rows.")
    df.reset_index(drop=True, inplace=True)
    df = df.loc[:399].copy()
    # Ensure output column exists
    df[out_col] = None
    batches = [[i for i in range(40*x,40*x +40)] for x in range(10)]
    
    calls_made = 0
    results = []
    for batch_indices in batches:
        batch_items = [{"id": int(i), "url": str(df.at[i, link_col])} for i in batch_indices]
        prompt = build_prompt(batch_items)

        try:
            result = call_gemini_json(prompt)
        except Exception:
            # Keep partial progress; stop further calls
            break

        # Expecting: [{"id": <df_index>, "summary": "..."} ...]
        # if isinstance(result, list):
        #     for obj in result:
        #         try:
        #             row_id = obj["id"]
        #             summary = obj["summary"]
        #             if row_id in target_idx:
        #                 df.at[row_id, out_col] = summary
        #         except Exception:
        #             continue
        results.append(result)
        calls_made += 1
    return results

In [16]:
results = summarize_links_in_batches(df)

In [19]:
result

NameError: name 'result' is not defined

In [17]:
res_flat = [x for sublist in results for x in sublist]
summary_map = {item["id"]: item["summary"] for item in res_flat}
df["summary"] = df.index.map(summary_map)
df

Unnamed: 0,id,title,link,published,source,summary,query,fetched_at
0,5bc55c5c7e34e8b58cd7a8cc961e46c23dfdbe3db7288e...,Bank of New York Mellon Stock Rises on Earning...,https://news.google.com/rss/articles/CBMigwNBV...,"Tue, 13 Jan 2026 15:50:00 GMT",Barron's,,"(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
1,c70561ad0ba56c37bd19752874cc9f8fdabb7b84a358b1...,Bank of New York Mellon Q4 Earnings Call Highl...,https://news.google.com/rss/articles/CBMif0FVX...,"Tue, 13 Jan 2026 19:06:54 GMT",Yahoo Finance,,"(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
2,f5def2c6cf9adbda0db93d7e8888ff0df68b9057058bff...,"FINRA fines BNY Mellon Capital Markets $60,000...",https://news.google.com/rss/articles/CBMisgFBV...,"Tue, 13 Jan 2026 18:41:00 GMT",Bond Buyer,,"(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
3,44d28c16208c8af49845e96af478bbd515993b4c20bf90...,Earnings call transcript: Bank of New York Mel...,https://news.google.com/rss/articles/CBMizgFBV...,"Tue, 13 Jan 2026 18:06:45 GMT",Investing.com,,"(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
4,9a7d93e03efc22a1f5f98f97e747ab40e00a97c9e959f3...,"Bank Of New York Mellon: Solid Q4, But Margin ...",https://news.google.com/rss/articles/CBMiqgFBV...,"Tue, 13 Jan 2026 15:52:09 GMT",Seeking Alpha,,"(""BNY Mellon"" OR ""Bank of New York Mellon"" OR ...",2026-01-13T14:51:35.540052-05:00
...,...,...,...,...,...,...,...,...
684,7cb673a91bd817e5ed042d1c7ef5f57911c3d78577bd31...,Bloomberg defers inclusion of Indian bonds in ...,https://news.google.com/rss/articles/CBMiyAFBV...,"Tue, 13 Jan 2026 07:42:50 GMT",ET Now,,"""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00
685,fe40746055e860d67455e32f7b7acfe8de263cb5c86dd6...,Bloomberg Index Services Postpones Indian Bond...,https://news.google.com/rss/articles/CBMi0gFBV...,"Tue, 13 Jan 2026 06:59:48 GMT",scanx.trade,,"""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00
686,1240a4d280bc3da87183832d9f8362ac73ba6f2d1f7dd9...,India bonds fall after Bloomberg index entry s...,https://news.google.com/rss/articles/CBMiVkFVX...,"Tue, 13 Jan 2026 12:35:30 GMT",Business Recorder,,"""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00
687,797242bb275685307d4420e4131f349f17527fc9ae7721...,Bloomberg defers Inclusion of Indian govt bond...,https://news.google.com/rss/articles/CBMiZEFVX...,"Tue, 13 Jan 2026 06:48:31 GMT",Daijiworld,,"""tokenized assets"" OR ""market infrastructure"" ...",2026-01-13T14:51:35.540052-05:00


In [27]:
import json, os, csv
OUT_CSV = "data/gnews2.csv"
df.to_csv(OUT_CSV, mode="a", header=True, index=False, quoting=csv.QUOTE_MINIMAL)

In [184]:
df.summary.loc[100]

'Advances in artificial intelligence and machine learning are rapidly transforming various industries, from healthcare to finance. These technologies are enabling new capabilities, automating complex tasks, and generating unprecedented insights. Ethical considerations and regulatory frameworks are being developed to guide their responsible deployment.'

In [97]:
import pandas as pd

df = pd.read_csv('data/state/gnews_seen_ids.csv')
df['first_seen_at'] = pd.to_datetime(df['first_seen_at'])
df = df[df['first_seen_at'] < pd.Timestamp('2026-01-19', tz='US/Eastern')]
df.to_csv('data/state/gnews_seen_ids.csv', index=False, header=False)

  df['first_seen_at'] = pd.to_datetime(df['first_seen_at'])


In [96]:
df

Unnamed: 0,id,first_seen_at
0,6f35d0ec9f74a710e5ed1f731f183df52bbeeb35f733d6...,2025-10-30 15:54:29.313964-04:00
1,06b7fbccc392f86179d2864b4b6e783e40552a29c978c8...,2025-10-30 15:54:29.313964-04:00
2,40f4333e49070177b7cc47fe5de1e9cb2758c8f28791f0...,2025-10-30 15:54:29.313964-04:00
3,e23983d5e0ebdceed1238069f96ece0f795674d24d29c1...,2025-10-30 15:54:29.313964-04:00
4,e7b62633d3a48594585b81c740311e428b20ea296941c3...,2025-10-30 15:54:29.313964-04:00
...,...,...
363,b5e1e1c3e2c2de826b251f1081c89b6c83b58736535f02...,2025-10-30 15:54:29.313964-04:00
364,313259dd7ab4a1d5f36dcc0f06123fbfb0fbe472d7ed81...,2025-10-30 15:54:29.313964-04:00
365,43b808a4b85228d184313cbdbb4e4ba65a41ea091ca877...,2025-10-30 15:54:29.313964-04:00
366,5c8b76d093f45b6df38e2a64a34fc8a7fd3c49ff4e729a...,2025-10-30 15:54:29.313964-04:00


In [77]:
print(f'\t something')



	 something


In [45]:
START_DATE = datetime(2025, 4, 11)
queries = build_entity_queries(ENTITIES, topics=None)
for i, q in enumerate(queries, 1):
    print(f"  Query {i}/{len(queries)}: {q[:80]}...")
from_date = START_DATE.strftime('%m/%d/%Y')
to_date = (START_DATE + timedelta(days=1)).strftime('%m/%d/%Y')
gn = GoogleNews(lang='en')
gn.set_time_range(from_date, to_date)  # format: 'MM/DD/YYYY'
gn.search(q)
results = gn.results()

  Query 1/10: ("Boeing" OR "BA") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 2/10: ("Reddit" OR "RDDT") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 3/10: ("Apple" OR "AAPL") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 4/10: ("Novo Nordisk" OR "NVO") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 5/10: ("Trump Media" OR "DJT" OR "Truth Social") -gossip -celebrity -"fantasy sports" ...
  Query 6/10: ("Tyson Foods" OR "TSN") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 7/10: ("Nvidia" OR "NVDA") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 8/10: ("NYCB" OR "New York Community Bank") -gossip -celebrity -"fantasy sports" -"gam...
  Query 9/10: ("Chipotle" OR "CMG") -gossip -celebrity -"fantasy sports" -"game recap"...
  Query 10/10: ("Tesla" OR "TSLA") -gossip -celebrity -"fantasy sports" -"game recap"...


In [46]:
print(f'from={from_date}, to={to_date}')

from=04/11/2025, to=04/12/2025


In [47]:
results

[{'title': 'Barclays says Tesla’s (TSLA) fundamentals are taking a back seat',
  'media': 'MSN',
  'date': '1 minutes ago',
  'datetime': datetime.datetime(2026, 1, 19, 16, 8, 21, 771667),
  'desc': 'Tesla, Inc. (NASDAQ:TSLA) is one of the AI Stocks Analysts Are Watching Closely. On January 16, Barclays reiterated Tesla and Rivian as “Equal Weight,”...',
  'link': 'https://www.msn.com/en-us/money/topstocks/barclays-says-tesla-s-tsla-fundamentals-are-taking-a-back-seat/ar-AA1UwJqF%3Focid%3Dfinance-verthp-feeds&ved=2ahUKEwjttNzWwZiSAxUXklYBHUo0G_EQxfQBegQICBAC&usg=AOvVaw0Dij-0o4HdivHoAfQg-Wdr',
  'img': 'data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=='},
 {'title': 'Tesla’s New Cybercab Finally Solves a Huge Winter Problem',
  'media': 'TeslaNorth.com',
  'date': '6 minutes ago',
  'datetime': datetime.datetime(2026, 1, 19, 16, 3, 21, 773196),
  'desc': "Tesla's upcoming Cybercab may be designed to drive itself, but one newly spotted feature suggests it

In [48]:
from pygooglenews import GoogleNews
gn = GoogleNews(lang='en')
results = gn.search('Tesla', from_='2024-03-11', to_='2024-03-12')

In [54]:
results['entries'][0]

{'title': '1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles - CleanTechnica',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': '',
  'value': '1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles - CleanTechnica'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc?oc=5'}],
 'link': 'https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngta

In [57]:
print(results['feed'].keys())

dict_keys(['generator_detail', 'generator', 'title', 'title_detail', 'links', 'link', 'language', 'publisher', 'publisher_detail', 'rights', 'rights_detail', 'updated', 'updated_parsed', 'image', 'subtitle', 'subtitle_detail'])


In [67]:
raw_rows[0]

{'id': 'a020a7adf400ffa1f8e9c4dfe778bd68faeae34627280681a48059692b66692b',
 'title': '1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles - CleanTechnica',
 'link': 'https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc?oc=5',
 'published': 'Mon, 11 Mar 2024 07:00:00 GMT',
 'source': 'CleanTechnica',
 'summary': '<a href="https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJF

In [None]:
{'title': '1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles - CleanTechnica',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': '',
  'value': '1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles - CleanTechnica'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc?oc=5'}],
 'link': 'https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc?oc=5',
 'id': 'CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc',
 'guidislink': False,
 'published': 'Mon, 11 Mar 2024 07:00:00 GMT',
 'published_parsed': time.struct_time(tm_year=2024, tm_mon=3, tm_mday=11, tm_hour=7, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=71, tm_isdst=0),
 'summary': '<a href="https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc?oc=5" target="_blank">1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles</a>&nbsp;&nbsp;<font color="#6f6f6f">CleanTechnica</font>',
 'summary_detail': {'type': 'text/html',
  'language': None,
  'base': '',
  'value': '<a href="https://news.google.com/rss/articles/CBMirAFBVV95cUxNZ044LTRWcmdDSHRIZlFWVWtKX1Q1MEJZckRkb1lnR0F3VUlVRWFfdlBOdTlKQngtakZxbzIxLWdmMHl1cHM5eG85b19xWl9ubmY5aWpGUWJyNF9fbGVyaWM2a180QTlCX1RSc1dWWHJFUWpydXVvZXhWUjU2dlRrYWVXbzJpcExDaG5yRDBnbnhBeXJKeXNISUE2VVhHbEFTWVlRdTVFV1VIRHBp0gGyAUFVX3lxTE92S2lKdDZZbDJnbmxKZENjdE1HRnZxUDRpeUNId1h3N2V4eFk3czVYazlSVHBtY1h4M1NpekxrT21iOHA3WEhwSVBRcmNoTllTT2EwQ2JGSU4xWkZMTm9iWkdsR3BUUVAyb0cyWmZRaHJZb0kwajdjTE1tdUEzQWFpdFRqMUdYTzhaNHE0UFlDMDB1QWFCMk41emt4MWNpQVNfakQxa0xnRXVNdWctR0tMbHc?oc=5" target="_blank">1st Generation Tesla Model 3 Still Keeping It Together At 100,000 Miles</a>&nbsp;&nbsp;<font color="#6f6f6f">CleanTechnica</font>'},
 'source': {'href': 'https://cleantechnica.com', 'title': 'CleanTechnica'},
 'sub_articles': []}