In [1]:
from src.gmail import Gmail
from src.newsletters.config import NEWSLETTER_AND_PARSER
from src.newsletters.parser.tldr import tldr_parser
from src.newsletters.parser.alpha_signal import alpha_signal_parser
from src.newsletters.parser.bloomberg_tech import bloomberg_tech_parser
from src.config import SCOPES

import re

In [2]:
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s -- l.%(lineno)d: %(message)s",
)

In [3]:
NEWSLETTER_AND_PARSER.keys()

dict_keys(['TLDR AI <dan@tldrnewsletter.com>', 'AlphaSignal <news@alphasignal.ai>', 'TLDR <dan@tldrnewsletter.com>', 'TLDR Product <dan@tldrnewsletter.com>', 'Bloomberg Technology <noreply@news.bloomberg.com>'])

In [4]:
gmail = Gmail(
    path_to_token="/home/secrets_vault/token.json",
    path_to_credentials="/home/secrets_vault/credentials.json",
    scopes=SCOPES
)

2025-02-13 18:45:56,121 - googleapiclient.discovery_cache - INFO -- l.49: file_cache is only supported with oauth2client<4.0.0


# Bloomberg Technology

In [8]:
emails = gmail.fetch_emails(sender='Bloomberg Technology <noreply@news.bloomberg.com>', after='2025-02-12', before='2025-02-12')
len(emails)

1

In [9]:
news_sources = bloomberg_tech_parser(emails[0])

2025-02-13 18:46:26,081 - src.newsletters.parser.bloomberg_tech - INFO -- l.32: Parsing email 'Defending a cherished VC tax break'
2025-02-13 18:46:26,813 - src.genai_model.genai_model - INFO -- l.106: Found 12 models for model_type small
2025-02-13 18:46:26,814 - src.genai_model.genai_model - INFO -- l.109: List of models included: ['gemini/gemini-1.5-flash-8b-latest', 'gemini/gemini-1.5-flash-8b-001', 'gemini/gemini-1.5-flash-8b-exp-0924', 'gemini/gemini-1.5-flash-8b-exp-0827', 'openrouter/google/gemini-flash-1.5-8b-exp', 'groq/gemma2-9b-it', 'openrouter/google/gemma-2-9b-it:free', 'mistal/ministral-8b-2410', 'groq/llama-3.1-8b-instant', 'openrouter/meta-llama/llama-3.1-8b-instruct:free', 'groq/llama-3-8b-8192', 'openrouter/meta-llama/llama-3-8b-instruct:free']
[92m18:46:26 - LiteLLM:INFO[0m: utils.py:2909 - 
LiteLLM completion() model= gemini-1.5-flash-8b-latest; provider = gemini
2025-02-13 18:46:26,817 - LiteLLM - INFO -- l.2909: 
LiteLLM completion() model= gemini-1.5-flash-8b-

In [10]:
news_sources

[{'title': 'Military tech contract',
  'url': 'https://www.bloomberg.com/news/articles/2025-02-11/anduril-to-take-over-managing-microsoft-goggles-for-us-infantry?cmpid=tech-in-brief',
  'news_provider': 'www.bloomberg.com',
  'source_of_the_news': 'Bloomberg Technology <noreply@news.bloomberg.com>',
  'text': '',
  'date_source': 'Wed, 12 Feb 2025 12:07:11 +0000',
  'date_source_time_zone': 'utc'},
 {'title': 'No executive change',
  'url': 'https://www.bloomberg.com/news/articles/2025-02-11/deliveroo-weighs-replacing-founding-ceo-shu-sky-news-reports?cmpid=tech-in-brief',
  'news_provider': 'www.bloomberg.com',
  'source_of_the_news': 'Bloomberg Technology <noreply@news.bloomberg.com>',
  'text': '',
  'news_summary': 'Delivery app Deliveroo said founder and Chief Executive OfficerWill Shu  isn’t planning to step down  despite a report the board is considering a replacement.',
  'date_source': 'Wed, 12 Feb 2025 12:07:11 +0000',
  'date_source_time_zone': 'utc'},
 {'title': 'AI data ce

## Experimentation

In [31]:
messages = gmail._fetch_messages(sender='Bloomberg Technology <noreply@news.bloomberg.com>', after='2025-02-10', before='2025-02-10')

In [32]:
message = gmail._get_message(email_id=messages[0]['id'])

In [33]:
type(message)

dict

In [12]:
import base64
from bs4 import BeautifulSoup

In [13]:
for pp in message['payload']["parts"]:
    if pp["mimeType"] == "text/html":
        body_data = pp["body"]["data"]
        text_html = base64.urlsafe_b64decode(body_data).decode()
        txt = BeautifulSoup(text_html, "html.parser").get_text()

In [20]:
def html_to_text_with_links(html):
    soup = BeautifulSoup(html, "html.parser")

    # Process <a> tags to keep text + URL
    for a in soup.find_all("a"):
        link_text = a.get_text()
        link_href = a.get("href")
        if link_href:
            a.replace_with(f"{link_text} ({link_href})")  # Replace with text + URL

    return soup.get_text()  # Get cleaned text

In [30]:
import re
re.findall(r"\(https://[^\)]+\)", html_to_text_with_links(text_html).split("Tech Across the Globe")[1].split("Must read")[0])

['(https://links.message.bloomberg.com/s/c/h0vO07j4nVYAiNfAnUa99Gn4EGYMHLBzLyFZ6MRNWXj2h0P45rFGWSHUqujhOvD4_v1kdne5qRs1Gum8W5ITXUoKeblJpLW5JugcBS8IoGH6C2gIa5wgt2dH8GXV9NUUnwtYoSwnWNctN4WSZdJJ9U_LBisQ4JX9FKaKmu0TEbBtasP-X8Zqio917UmWi_v81yxsHmlOCMgrUPUi1113hx5_PNs1Sefa984CFjqQF8VFvHG4ejvweUtZSfPrqH7cuBpF4fMy3eus7Bol3ERfeE_kQP4ord4EF-kCQ5SltfH-m2zr8el7d2Z_2H7mu1LUQ1ekKjzfHrwZCbtGpKMsv4wHWtBSKi3361AYArBtoa2I6fZ0Ufp8Gtrz0w/TAcFi9Epdlg0pnhqeA__6wq8cw2NQwOM/5)',
 '(https://links.message.bloomberg.com/s/c/7-WVLVP8op58AN0wD2Rb_pijMADTnqMay6HAB65RKsRuef0gQDJuYHuq-jxAsG_DXmWTvouSRvOADrHnjB_l6rAkAMNHJohw66ME88gQZOIq3AsCtuqPlHwF3qwpMoTtNdyF7dbXmQseDHjihxgpboBKKbmGMCA7taS4P0OiwkWBAeS4JAjBjEje0bJ1Q6-E3lyOHEkPy5qcm3Yld_oONurpUpkUC5jQA3LMtXgP-wszrKQe-PVY-TN8OZxUu5toiwV8csLr3EQbyWJ0PE_cuzww4GHmvPXAdzS68Lg4N4h1RUizLX-lsFkjIh0SzlK8SE4rtSXv0zbSG6F1MEWgKWfx3yPzVqbPflj_mWKKeB_9MO28GhpgbUc7zw/LpEPyIZmuOrNNA-ugJhnWKjVvc98FMEM/5)',
 '(https://links.message.bloomberg.com/s/c/rqOmiJ-IO6vmrhgFVRH4kQgtWlXeZ5_Jqx5Ds

In [19]:
message = gmail.service.users().messages().get(userId="me", id=messages[0]['id'], format="full").execute()
for part in message["payload"]["parts"]:
    if part["mimeType"] == "text/html":  
        body = base64.urlsafe_b64decode(part["body"]["data"]).decode("utf-8")

2025-02-10 23:39:57,157 - googleapiclient.discovery - DEBUG -- l.1258: URL being requested: GET https://gmail.googleapis.com/gmail/v1/users/me/messages/194efc5ce97c38fd?format=full&alt=json


In [20]:
body

'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="width: 100%;"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> <meta name="viewport" content="initial-scale=1"/> <title>Tech In Brief</title> <style>body{width:100%;-webkit-font-smoothing:antialiased;font-family:Georgia,serif;font-size:16px;line-height:24px;margin:0;padding:0}img{max-width:550px}@media only screen and (max-width:480px){.lihide{display:none!important}.lishow{display:block!important;width:auto!important;overflow:visible!important;float:none!important;max-height:inherit!important;line-height:inherit!important}.email-ssl-image{width:100%!important;height:auto!important}.body-image img{width:100%!important}.logo-image{width:100%!important}}@media screen and (max-width:525px){.editorial-tout__inner{font-size:15px!important;line-height:23px!important;pad

In [30]:
messages[0]['id']

'194efc5ce97c38fd'

In [41]:
txt = (emails[0]["text"].split("Tech Across the Globe")[1].split("Revalued")[0].strip())
all_articles = txt.split("\n\n")
title, text = all_articles[0].split(":")

In [42]:
title

'AI investment for Europe'

In [45]:
text.strip().replace("\n", "").replace("<>", "")

'OpenAI Chief Executive Officer Sam Altman said he’d “love” to help build anAI project in Europe  on the scale of Stargate, the proposed $500 billion US development from OpenAI, SoftBank and Oracle for artificial intelligence services.'

In [46]:
txt = (emails[0]["text"].split("Tech Across the Globe")[1].split("Revalued")[1].split("Must read")[0].strip())

In [48]:
txt.split("\n\n")

['Rapyd Financial Network <> is in fundraising talks that may cut its valuation \n<> to $3.5 billion from around $9 billion in 2021. The company, which has \noffices in London, Tel Aviv and elsewhere, is looking to raise $300 million \nfrom investors, according to people familiar with the talks.']

# ALPHA SIGNAL

In [8]:
emails = gmail.fetch_emails(sender='AlphaSignal <news@alphasignal.ai>', after='2025-02-10', before='2025-02-10')
len(emails), type(emails)

(1, list)

In [9]:
news_sources = alpha_signal_parser(emails[0])

2025-02-11 01:31:57,400 - src.newsletters.parser.alpha_signal - INFO -- l.33: Parsing email '🚨 Sam Altman Reveals AI Costs Drop 10x Every Year'
2025-02-11 01:31:57,402 - src.genai_model.genai_model - INFO -- l.106: Found 12 models for model_type small
2025-02-11 01:31:57,404 - src.genai_model.genai_model - INFO -- l.109: List of models included: ['gemini/gemini-1.5-flash-8b-latest', 'gemini/gemini-1.5-flash-8b-001', 'gemini/gemini-1.5-flash-8b-exp-0924', 'gemini/gemini-1.5-flash-8b-exp-0827', 'openrouter/google/gemini-flash-1.5-8b-exp', 'groq/gemma2-9b-it', 'openrouter/google/gemma-2-9b-it:free', 'mistal/ministral-8b-2410', 'groq/llama-3.1-8b-instant', 'openrouter/meta-llama/llama-3.1-8b-instruct:free', 'groq/llama-3-8b-8192', 'openrouter/meta-llama/llama-3-8b-instruct:free']
[92m01:31:57 - LiteLLM:INFO[0m: utils.py:2909 - 
LiteLLM completion() model= gemini-1.5-flash-8b-latest; provider = gemini
2025-02-11 01:31:57,407 - LiteLLM - INFO -- l.2909: 
LiteLLM completion() model= gemini-

In [10]:
news_sources

[{'title': 'OpenAI’s Sam Altman outlines AI trends',
  'url': 'https://blog.samaltman.com/three-observations',
  'source_of_the_news': 'AlphaSignal <news@alphasignal.ai>',
  'text': 'OpenAI CEO Sam Altman outlined key trends in AI development, emphasizing the scaling economics of intelligence, the rapid cost reduction of AI usage, and the implications of AI agents.  KEY HIGHLIGHTS   \t* Intelligence scales logarithmically with resources, with no clear saturation point.   \t* AI costs decline at an unprecedented rate, dropping 10x every 12 months.   \t* AI agents are positioned as scalable knowledge workers, handling constrained tasks at scale.   \t* Scientific progress is expected to accelerate, driven by automation of complex reasoning tasks.   \t* AI will integrate across economic sectors, reshaping workflows without immediate large-scale disruptions.  ECONOMIC IMPLICATIONS   \t* AI spreads beyond tech companies, distributing its benefits across industries.   \t* The balance between 

In [23]:
news_sources[0]['url']

'https://mistral.ai/en/news/all-new-le-chat'

In [32]:
import pandas as pd
df = pd.read_parquet("/home/logs/database_news_stories.parquet")

In [33]:
df.dtypes

title                                           object
url                                             object
news_provider                                   object
source_of_the_news                              object
text                                            object
news_summary                                    object
date_source                 datetime64[ns, US/Eastern]
date_source_time_zone                           object
version                                         object
competitive_intelligence                        object
themes                                          object
market_intelligence                             object
personalities                                   object
score_category_count                             int64
unique_id                                        int64
dtype: object

## Experimentation

In [6]:
emails[0].keys()

dict_keys(['sender', 'subject', 'date_utc', 'id', 'text'])

In [7]:
emails[0]['sender']

'AlphaSignal <news@alphasignal.ai>'

In [21]:
emails[0]['subject']

'🔥Mistral AI updates Chatbot, Faster than ChatGPT & Claude'

In [25]:
(emails[0]['text'])

"On Andrew Ng's agentic object detection, GitHub Copilot's agent mode,\r\nGoogle's Imagen 3 on Gemini API,...\xa0\r\n\r\nSIGNUP [1]\xa0\xa0|\xa0\xa0WORK WITH US [2]\xa0\xa0|\xa0\xa0FOLLOW ON X\r\n[3]\xa0\xa0|\xa0\xa0READ ON WEB [4]\r\n\r\n . \r\n\r\nHEY ,\r\n\r\nWelcome to AlphaSignal – the most read newsletter by AI\r\ndevelopers.\xa0\r\n\r\nWe bring you the top 1% of news, papers, models, and repos, all\r\nsummarized to keep you updated on the latest in AI.\r\n\r\nIN TODAY'S SIGNAL\r\n\r\n_Read time: 5 min 41 sec_\r\n\r\n🎖️ TOP NEWS\r\n\r\n \t*\r\nMistral AI brings Le Chat to iOS, Android, and enterprise [5] with web\r\nsearch and automation tools.\r\n\r\n⚡️ TRENDING SIGNALS\r\n\r\n \t*\r\nAndrew Ng presents Agentic Object Detection [6]: human-like object\r\nrecognition via text-driven reasoning.\r\n\r\n \t*\r\nGitHub upgrades Copilot with agent mode [7], multi-file edits, and an\r\nupcoming autonomous coding agent.\r\n\r\n \t*\r\nGoogle releases Imagen 3 in Gemini API [8]: high qual

In [12]:
for line in emails[0]['text'].split("TOP NEWS")[1].split("\r\n\r\n"):
    if " \t*\r" in line and re.search(r"\[\d+\]", line):
        print(line.lstrip(" \t*\r\n").strip())
        print(" ")

Mistral AI brings Le Chat to iOS, Android, and enterprise [5] with web
search and automation tools.
 
Andrew Ng presents Agentic Object Detection [6]: human-like object
recognition via text-driven reasoning.
 
GitHub upgrades Copilot with agent mode [7], multi-file edits, and an
upcoming autonomous coding agent.
 
Google releases Imagen 3 in Gemini API [8]: high quality, watermarked
image generation with improved prompt adherence.
 
OpenAI refines CoT reasoning in O3-Mini [9] models, expanding
visibility.
 
Pika Labs launches Pikadditions [10]: add any object or person to any
video with ease.
 
Google AI's AlphaGeometry2 surpasses gold medalists [11] in Olympiad
geometry, improving problem coverage and solving rates.
 
s1-32B improves test-time scaling [12] with budget forcing, boosting
math reasoning and competition scores.
 
Multi-agent system reduces AI hallucinations [13]using structured
reviews, NLP-based coordination, and new KPIs.
 


In [29]:
emails[0]['text'].split("TOP NEWS")[1].split("\r\n\r\n")

['',
 ' \t*\r\nMistral AI brings Le Chat to iOS, Android, and enterprise [5] with web\r\nsearch and automation tools.',
 '⚡️ TRENDING SIGNALS',
 ' \t*\r\nAndrew Ng presents Agentic Object Detection [6]: human-like object\r\nrecognition via text-driven reasoning.',
 ' \t*\r\nGitHub upgrades Copilot with agent mode [7], multi-file edits, and an\r\nupcoming autonomous coding agent.',
 ' \t*\r\nGoogle releases Imagen 3 in Gemini API [8]: high quality, watermarked\r\nimage generation with improved prompt adherence.',
 ' \t*\r\nOpenAI refines CoT reasoning in O3-Mini [9] models, expanding\r\nvisibility.',
 ' \t*\r\nPika Labs launches Pikadditions [10]: add any object or person to any\r\nvideo with ease.',
 '💻TOP PAPERS',
 " \t*\r\nGoogle AI's AlphaGeometry2 surpasses gold medalists [11] in Olympiad\r\ngeometry, improving problem coverage and solving rates.",
 ' \t*\r\ns1-32B improves test-time scaling [12] with budget forcing, boosting\r\nmath reasoning and competition scores.',
 ' \t*\r\nMu

In [13]:
SECTIONS = [
    "TOP NEWS",
    "TRENDING SIGNALS",
    "TOP PAPERS",
    "PYTHON TIP",
]
txt = emails[0]['text']
sections = []
for sec in SECTIONS:
    _tmp = txt.split(sec)
    sections.append(_tmp[0])
    txt = _tmp[1]

In [15]:
for pp in sections[1:]:
    print(pp)



 	*
Mistral AI brings Le Chat to iOS, Android, and enterprise [5] with web
search and automation tools.

⚡️ 


 	*
Andrew Ng presents Agentic Object Detection [6]: human-like object
recognition via text-driven reasoning.

 	*
GitHub upgrades Copilot with agent mode [7], multi-file edits, and an
upcoming autonomous coding agent.

 	*
Google releases Imagen 3 in Gemini API [8]: high quality, watermarked
image generation with improved prompt adherence.

 	*
OpenAI refines CoT reasoning in O3-Mini [9] models, expanding
visibility.

 	*
Pika Labs launches Pikadditions [10]: add any object or person to any
video with ease.

💻


 	*
Google AI's AlphaGeometry2 surpasses gold medalists [11] in Olympiad
geometry, improving problem coverage and solving rates.

 	*
s1-32B improves test-time scaling [12] with budget forcing, boosting
math reasoning and competition scores.

 	*
Multi-agent system reduces AI hallucinations [13]using structured
reviews, NLP-based coordination, and new KPIs.

🧠 


In [19]:
print(emails[0]['text'].split("TOP NEWS")[2].split("WHAT'S NEW")[1].split("TRENDING SIGNALS")[0].split("TRY NOW")[0])



French AI lab, Mistral AI updates its chatbot LE CHAT and makes it
available on iOS, Android, and enterprise infrastructure. It
introduces FLASH ANSWERS, A BUILT-IN CODE INTERPRETER, and REAL-TIME
SEARCH, competing with ChatGPT, Claude, and DeepSeek.

CORE FUNCTIONALITIES

Le Chat provides tools for both general use and technical
applications, offering:

 	* OCR ENGINE: Processes PDFs, spreadsheets, and complex or
low-quality files.

 	* CODE INTERPRETER: Runs scripts, handles data analysis, and creates
visualizations.

 	* IMAGE GENERATION: Uses BLACK FOREST LABS’ FLUX ULTRA to generate
photorealistic visuals.

 	* REAL-TIME INFORMATION RETRIEVAL: Combines static pre-trained
knowledge with updated data from web searches and news outlets.

 	* Its MULTI-STEP AGENT FRAMEWORK lets you AUTOMATE WORKFLOWS by
integrating with messaging systems and databases.

PERFORMANCE

Le Chat runs on Mistral’s latest models and introduces FLASH
ANSWERS, a feature enabling RESPONSE SPEEDS OF UP TO 1,00

# TLDR

In [4]:
emails = gmail.fetch_emails(sender='TLDR AI <dan@tldrnewsletter.com>', after='2024-12-01', before='2024-12-07')
len(emails)

5

In [5]:
print(emails[0])

{'sender': 'TLDR AI <dan@tldrnewsletter.com>', 'subject': "OpenAI o1 System Card 📇, Perplexity Publisher Program 🔍, DeepMind's Genie 2 🧞", 'date_utc': 'Fri, 6 Dec 2024 14:16:24 +0000', 'id': '1939c5434a9f7fd8', 'text': "This report outlines the safety work carried out prior to releasing\r\nOpenAI o1 and o1-mini, including external red teaming and frontier\r\nrisk\r\nevaluation\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\u200c\xa0\r\n\r\n\r\n Sign Up [1] |Advertise [2]|View Online [3] \r\n\r\n\t\tTLDR \r\n\r\n\t\tTOGETHER

In [6]:
news_sources = tldr_parser(emails[0])

In [7]:
news_sources

[{'title': 'OPENAI O1 SYSTEM CARD',
  'url': 'https://openai.com/index/openai-o1-system-card/',
  'source_of_the_news': 'TLDR AI <dan@tldrnewsletter.com>',
  'news_summary': "This report outlines the safety work carried out prior to releasing OpenAI o1 and o1-mini, including external red teaming and frontier risk evaluations according to OpenAI's Preparedness Framework.",
  'date_source': 'Fri, 6 Dec 2024 14:16:24 +0000',
  'new_provider': 'openai.com'},
 {'title': "PERPLEXITY EXPANDING IT'S PUBLISHER'S PROGRAM",
  'url': 'https://www.perplexity.ai/hub/blog/perplexity-expands-publisher-program-with-15-new-media-partners',
  'source_of_the_news': 'TLDR AI <dan@tldrnewsletter.com>',
  'news_summary': "Perplexity has added over a dozen international news organizations to its Publishers' Program, offering tools, revenue sharing, and support to strengthen collaboration with global media.",
  'date_source': 'Fri, 6 Dec 2024 14:16:24 +0000',
  'new_provider': 'www.perplexity.ai'},
 {'title': 

In [8]:
links = emails[0]['text'].split("\r\n\r\n")[-1].split('\r\n')
links

['Links:',
 '------',
 '[1] https://tldr.tech/ai?utm_source=tldrai',
 '[2] https://advertise.tldr.tech/?utm_source=tldrai&utm_medium=newsletter&utm_campaign=advertisetopnav',
 '[3] https://a.tldrnewsletter.com/web-version?ep=1&lc=57aff87c-ae02-11ef-9074-4b7e3c69f0c6&p=68d03084-b3c7-11ef-928d-09d27092d252&pt=campaign&t=1733494584&s=a94ebb372672f7844f8d11fcad3029c68791b13faf831d296ca3c8f639256320',
 '[4] https://writer.com/product/ai-studio/?utm_source=tldr&utm_medium=newsletter&utm_campaign=ai_studio',
 '[5] https://links.tldrnewsletter.com/xS4pwo',
 '[6] https://links.tldrnewsletter.com/UUkpwY',
 '[7] https://techcrunch.com/2024/12/04/deepminds-genie-2-can-generate-interactive-worlds-that-look-like-video-games/?utm_source=tldrai',
 '[8] https://arxiv.org/abs/2412.03555?utm_source=tldrai',
 '[9] https://arxiv.org/abs/2412.02044v1?utm_source=tldrai',
 '[10] https://arxiv.org/abs/2412.03248v1?utm_source=tldrai',
 '[11] https://page.camunda.com/wp-rethinking-change-management?utm_medium=pa

In [14]:
"tldr" in links[6]

True

In [9]:
links[10].split('] ')

['[9', 'https://arxiv.org/abs/2412.02044v1?utm_source=tldrai']

In [10]:
type(news_sources[0]['url']), news_sources[0]['url']

(str, 'https://openai.com/index/openai-o1-system-card/')