### Newsbot to write a daily AI news summary using langgraph
- Save a list of HTML files from sources.yaml (tech news sites)
- Extract URLs for the news stories
- Filter URLs to remove duplicates, articles seen before, and non-AI articles (using a ChatGPT prompt)
- Perform headline topic analysis and sort by topic to help the AI structure the response by topic
- Scrape and summarize individual articles
- Compose and email the summary


In [None]:
# to selectively re-import as needed
import sys
del sys.modules['ainb_llm']
del sys.modules['ainb_const']
# del sys.modules['ainb_utilities']
# del sys.modules['ainb_webscrape']


In [1]:
from datetime import datetime
import os
import yaml
import sqlite3
import json
from collections import Counter
import uuid
from typing import TypedDict, Annotated

import operator
import pickle

import langchain
from langchain_openai import ChatOpenAI
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_core.prompts import (ChatPromptTemplate, MessagesPlaceholder, PromptTemplate,
                                    SystemMessagePromptTemplate, HumanMessagePromptTemplate)
# from langchain.agents import create_tool_calling_agent, AgentExecutor
# from langchain.memory import ChatMessageHistory
from langgraph.checkpoint.memory import MemorySaver
from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
# from langchain.tools import BaseTool, StructuredTool, tool
# from langchain_community.chat_models import ChatOllama
# from langchain_community.tools.tavily_search import TavilySearchResults
# from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import SimpleJsonOutputParser, JsonOutputParser, StrOutputParser
# from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
# from langchain_core.outputs import LLMResult

from langgraph.graph import StateGraph, START, END

import numpy as np
import pandas as pd
import umap
# import matplotlib.pyplot as plt

import sklearn
# from sklearn.cluster import KMeans
# from sklearn.metrics import silhouette_score
from sklearn.cluster import DBSCAN

import bs4
# from bs4 import BeautifulSoup
# import requests
from urllib.parse import urljoin, urlparse

import multiprocessing
# from concurrent.futures import ThreadPoolExecutor, as_completed
import asyncio
import aiohttp

from IPython.display import HTML, Image, Markdown, display
import markdown

# import smtplib
# from email.mime.multipart import MIMEMultipart
# from email.mime.text import MIMEText

import openai
from openai import OpenAI

import trafilatura

VERBOSE=1
from ainb_const import (DOWNLOAD_DIR, PAGES_DIR,
                        MODEL, LOWCOST_MODEL, HIGHCOST_MODEL, CANONICAL_TOPICS,
                        SOURCECONFIG, FILTER_PROMPT, TOPIC_PROMPT,
                        SUMMARIZE_SYSTEM_PROMPT, SUMMARIZE_USER_PROMPT, FINAL_SUMMARY_PROMPT, TOP_CATEGORIES_PROMPT,
                        MAX_INPUT_TOKENS, MAX_OUTPUT_TOKENS, MAX_RETRIES, TEMPERATURE, SQLITE_DB,
                        HOSTNAME_SKIPLIST, SITE_NAME_SKIPLIST,
                       )
from ainb_utilities import (log, delete_files, filter_unseen_urls_db, insert_article,
                            nearest_neighbor_sort, agglomerative_cluster_sort, traveling_salesman_sort_scipy,
                            unicode_to_ascii, send_gmail)
from ainb_webscrape import (get_driver, quit_drivers, launch_drivers, get_file, get_url, parse_file,
                            get_og_tags, get_path_from_url, trimmed_href, process_source_queue_factory,
                            process_url_queue_factory, get_google_news_redirects)
from ainb_llm import (paginate_df, process_pages, fetch_pages, fetch_openai, fetch_all_summaries,
                      fetch_openai_summary, trunc_tokens, categorize_headline, categorize_df, clean_topics)


import asyncio
# need this to run async in jupyter since it already has an asyncio event loop running
import nest_asyncio
nest_asyncio.apply()


In [2]:
print(f"LangChain         {langchain.__version__}")
print(f"OpenAI            {openai.__version__}")
# print(f"smtplib           {smtplib.sys.version}")
print(f"trafilatura       {trafilatura.__version__}")
# print(f"bs4               {bs4.__version__}")
print(f"numpy             {np.__version__}")
print(f"pandas            {pd.__version__}")
print(f"sklearn           {sklearn.__version__}")
print(f"umap              {umap.__version__}")


LangChain         0.3.0
OpenAI            1.47.0
trafilatura       1.12.2
bs4               4.12.3
numpy             1.26.4
pandas            2.2.3
sklearn           1.5.2
umap              0.5.6


In [5]:
# a basic LLM call with langchain
MODEL="gpt-4o"
model = ChatOpenAI(model=MODEL)

model.invoke([
    SystemMessage(content="Translate the following from English into Italian"),
    HumanMessage(content='Listen to me. You are beautiful. You are perfect and I love you.'),
])


2024-09-23 17:50:13,832 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


AIMessage(content='Ascoltami. Sei bellissima. Sei perfetta e ti amo.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 34, 'total_tokens': 50, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_e375328146', 'finish_reason': 'stop', 'logprobs': None}, id='run-b7293b0d-c8f9-4a85-b6dc-c98c24647781-0', usage_metadata={'input_tokens': 34, 'output_tokens': 16, 'total_tokens': 50})

In [6]:
# use template
system_template = "Translate the following into {language}:"
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)
parser = StrOutputParser()
chain = prompt_template | model | parser
chain.invoke({"language": "italian", "text": "hi"})


2024-09-23 17:50:22,734 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'Ciao'

In [7]:
prompt_inputs = [
    {"language": "French", "adjective1": "flawless", "adjective2": "beautiful"},
    {"language": "German", "adjective1": "immaculate", "adjective2": "exquisite"},
    {"language": "Spanish", "adjective1": "perfect", "adjective2": "gorgeous"},
    {"language": "Italian", "adjective1": "amazing", "adjective2": "magnificent"},
    {"language": "Hungarian", "adjective1": "ravishing", "adjective2": "stunning"},
]

system_template = 'Translate the following into {language}:'
user_template = 'Listen to me. You are {adjective1}. You are {adjective2} and I love you.'

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template),
     ("user", user_template)]
)

parser = StrOutputParser()

chain = prompt_template | model | parser

start_time = datetime.now()
for tpl in prompt_inputs:
    response = ""
    # stream tokens as they are generated
    for r in chain.stream(tpl):
        print(r, end="")
        response += r
end_time = datetime.now()

difference = end_time - start_time
total_seconds = difference.total_seconds()
print(f"\n\nElapsed seconds: {total_seconds:.6f}")


2024-09-23 17:50:26,832 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Écoute-moi. Tu es parfait(e). Tu es magnifique et je t'aime.

2024-09-23 17:50:28,776 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Hör mir zu. Du bist makellos. Du bist exquisit und ich liebe dich.

2024-09-23 17:50:29,390 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Escúchame. Eres perfecta. Eres preciosa y te amo.

2024-09-23 17:50:29,801 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Ascoltami. Sei incredibile. Sei magnifico e ti amo.

2024-09-23 17:50:30,517 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Figyelj rám. Elragadó vagy. Lenyűgöző vagy, és szeretlek.

Elapsed seconds: 4.318126


In [8]:
# same but send all at once using asyncio

async def async_langchain(chain, input_dict):
    response = await chain.ainvoke(input_dict)
    return response


prompt_templates = [
    {"language": "French", "adjective1": "flawless", "adjective2": "beautiful"},
    {"language": "German", "adjective1": "immaculate", "adjective2": "exquisite"},
    {"language": "Spanish", "adjective1": "perfect", "adjective2": "gorgeous"},
    {"language": "Italian", "adjective1": "amazing", "adjective2": "magnificent"},
    {"language": "Hungarian", "adjective1": "ravishing", "adjective2": "stunning"},
]

start_time = datetime.now()
tasks = []
for d in prompt_templates:
    task = asyncio.create_task(async_langchain(chain, d))
    tasks.append(task)
responses = await asyncio.gather(*tasks)
end_time = datetime.now()


difference = end_time - start_time
total_seconds = difference.total_seconds()
print(f"\n\nElapsed seconds: {total_seconds:.6f}")
print("\n".join(responses))


2024-09-23 17:50:32,668 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-09-23 17:50:32,672 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-09-23 17:50:32,678 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-09-23 17:50:32,975 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2024-09-23 17:50:33,012 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"




Elapsed seconds: 0.837595
Écoute-moi. Tu es parfait(e). Tu es magnifique et je t'aime.
Hör mir zu. Du bist makellos. Du bist exquisit und ich liebe dich.
Escúchame. Eres perfecto/a. Eres maravilloso/a y te amo.
Ascoltami. Sei fantastico. Sei magnifico e ti amo.
Hallgass rám. Te elbűvölő vagy. Te lenyűgöző vagy, és szeretlek.


In [9]:
DEBUG = True
N_BROWSERS = 4

In [10]:
# class to maintain settings and state within graph

class AgentState(TypedDict):
    AIdf: dict                #  the current working set of headlines (pandas dataframe not supported)
    before_date: str          #  ignore stories before this date for deduplication (force reprocess since)
    do_download: bool         #  if False use existing files, else download from sources
    sources: dict             #  sources to scrap
    sources_reverse: dict     #  map file names to sources
    headline_str: str         #  headline text email
    bullets: str              #  bullet point summary email
    summary: str              #  final summary
    n_edits: int              #  count edit iterations so we don't keep editing forever
    edit_complete: bool       #  edit will update if no more edits to make
    # message thread with OpenAI
    messages: Annotated[list[AnyMessage], operator.add]


In [11]:
# Initialize reading configurations from YAML file

def fn_initialize(state: AgentState) -> AgentState:
    """
    Initializes the agent state by loading source configurations from SOURCECONFIG (sources.yaml) .

    Args:
        state (AgentState): The current state of the agent.
        verbose (bool, optional): Whether to print verbose output. Defaults to False.

    Returns:
        AgentState: The updated state of the agent.

    Raises:
        yaml.YAMLError: If there is an error while loading the YAML file.

    """

    #  load sources to scrape from sources.yaml
    with open(SOURCECONFIG, "r") as stream:
        try:
            state['sources'] = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)

    log(f"Initialized {len(state['sources'])} items in sources from {SOURCECONFIG}")

    # make a reverse dict to map file titles to source names
    state['sources_reverse'] = {}
    for k, v in state['sources'].items():
        log(f"{k} -> {v['url']} -> {v['title']}.html")
        v['sourcename'] = k
        # map filename (title) to source name
        state['sources_reverse'][v['title']] = k

    log(f"Initialized {len(state['sources_reverse'])} items in sources_reverse")

    return state


if DEBUG:
    test_state = AgentState()
    fn_initialize(test_state)


2024-09-23 17:50:47,397 - AInewsbot - INFO - Initialized 17 items in sources from sources.yaml
2024-09-23 17:50:47,397 - AInewsbot - INFO - Ars Technica -> https://arstechnica.com/ -> Ars Technica.html
2024-09-23 17:50:47,398 - AInewsbot - INFO - Bloomberg Tech -> https://www.bloomberg.com/technology -> Bloomberg Technology.html
2024-09-23 17:50:47,398 - AInewsbot - INFO - Business Insider -> https://www.businessinsider.com/tech -> Business Insider Tech.html
2024-09-23 17:50:47,398 - AInewsbot - INFO - FT Tech -> https://www.ft.com/technology -> FT Technology.html
2024-09-23 17:50:47,399 - AInewsbot - INFO - Feedly AI -> https://feedly.com/i/aiFeeds?options=eyJsYXllcnMiOlt7InBhcnRzIjpbeyJpZCI6Im5scC9mL3RvcGljLzMwMDAifV0sInNlYXJjaEhpbnQiOiJ0ZWNobm9sb2d5IiwidHlwZSI6Im1hdGNoZXMiLCJzYWxpZW5jZSI6ImFib3V0In1dLCJidW5kbGVzIjpbeyJ0eXBlIjoic3RyZWFtIiwiaWQiOiJ1c2VyLzYyZWViYjlmLTcxNTEtNGY5YS1hOGM3LTlhNTdiODIwNTMwOC9jYXRlZ29yeS9HYWRnZXRzIn1dfQ -> Feedly AI.html
2024-09-23 17:50:47,399 - AInewsbot -

In [12]:
# scrape sources with selenium and save local files in DOWNLOAD_DIR (htmldata)
def fn_download_sources(state: AgentState) -> AgentState:
    """
    Scrapes sources and saves HTML files.
    If state["do_download"] is True, deletes all files in DOWNLOAD_DIR (htmldata) and scrapes fresh copies.
    If state["do_download"] is False, uses existing files in DOWNLOAD_DIR.
    Uses state["sources"] for config info on sources to scrape
    For each source, saves the current filename to state["sources"][sourcename]['latest']

    Args:
        state (AgentState): The current state of the agent.
        do_delete (bool, optional): Whether to delete files in DOWNLOAD_DIR. Defaults to True.

    Returns:
        AgentState: The updated state of the agent.
    """

    if state["do_download"]:
        # empty download directory
        delete_files(DOWNLOAD_DIR)

        # save each file specified from sources
        log(f"Saving HTML files using {N_BROWSERS} browsers")

        # Create a queue for multiprocessing and populate it
        queue = multiprocessing.Queue()
        for item in state["sources"].values():
            queue.put(item)

        # Function to take the queue and pop entries off and process until none are left
        # lets you create an array of functions with different args
        callable = process_source_queue_factory(queue)

        saved_pages = launch_drivers(N_BROWSERS, callable)
        for sourcename, file in saved_pages:
            log(f"Downloaded {sourcename} to {file}")
            state['sources'][sourcename]['latest'] = file
        log(f"Saved {len(saved_pages)} HTML files")

    else:   # don't delete, just get list of existing files
        log(f"Web fetch disabled, using existing files in {DOWNLOAD_DIR}")
        # Get the current date
        datestr = datetime.now().strftime("%m_%d_%Y")
        files = [os.path.join(DOWNLOAD_DIR, file)
                 for file in os.listdir(DOWNLOAD_DIR)]
        # filter files with today's date ending in .html
        files = [
            file for file in files if datestr in file and file.endswith(".html")]
        log(f"Found {len(files)} previously downloaded files")
        for file in files:
            log(file)

        saved_pages = []
        for file in files:
            filename = os.path.basename(file)
            # locate date like '01_14_2024' in filename
            position = filename.find(" (" + datestr)
            basename = filename[:position]
            # match to source name
            sourcename = state["sources_reverse"].get(basename)
            if sourcename is None:
                log(f"Skipping {basename}, no sourcename metadata")
                continue
            state["sources"][sourcename]['latest'] = file

    return state

if DEBUG:
    test_state["do_download"] = False
#     test_state["before_date"] = '2024-09-22 12:00:00'
    _ = fn_download_sources(test_state)


2024-09-23 17:50:56,883 - AInewsbot - INFO - Web fetch disabled, using existing files in htmldata
2024-09-23 17:50:56,885 - AInewsbot - INFO - Found 17 previously downloaded files
2024-09-23 17:50:56,885 - AInewsbot - INFO - htmldata/New York Times Technology (09_23_2024 05_32_41 PM).html
2024-09-23 17:50:56,886 - AInewsbot - INFO - htmldata/Hacker News Page 2 (09_23_2024 05_32_29 PM).html
2024-09-23 17:50:56,886 - AInewsbot - INFO - htmldata/HackerNoon (09_23_2024 05_32_39 PM).html
2024-09-23 17:50:56,887 - AInewsbot - INFO - htmldata/WSJ Technology (09_23_2024 05_33_19 PM).html
2024-09-23 17:50:56,887 - AInewsbot - INFO - htmldata/Bloomberg Technology (09_23_2024 05_32_17 PM).html
2024-09-23 17:50:56,888 - AInewsbot - INFO - htmldata/FT Technology (09_23_2024 05_32_17 PM).html
2024-09-23 17:50:56,888 - AInewsbot - INFO - htmldata/Washington Post Technology (09_23_2024 05_33_14 PM).html
2024-09-23 17:50:56,888 - AInewsbot - INFO - htmldata/Feedly AI (09_23_2024 05_33_37 PM).html
2024-

In [21]:
def fn_extract_urls(state: AgentState) -> AgentState:
    """
    Extracts news URLs from the latest HTML files matching the patterns defined in the state['sources'] configuration info.

    Args:
        state (AgentState): The current state of the agent.

    Returns:
        AgentState: The updated state of the agent with the extracted URLs stored in state['AIdf'].
    """
    # Parse news URLs and titles from downloaded HTML files
    log("Parsing html files")
    all_urls = []
    for sourcename, sourcedict in state['sources'].items():
        filename = sourcedict.get('latest')
        if not filename:
            log(f"no filename found for {sourcename}")
            continue

        log(sourcename + ' -> ' + filename)
        links = parse_file(state['sources'][sourcename])
        log(f"{len(links)} links found")
        all_urls.extend(links)

    log(f"Saved {len(all_urls)} links")

    # make a pandas dataframe of all the links found
    AIdf = (
        pd.DataFrame(all_urls)
        .groupby("url")
        .first()
        .reset_index()
        .sort_values("src")[["src", "title", "url"]]
        .reset_index(drop=True)
        .reset_index(drop=False)
        .rename(columns={"index": "id"})
    )
    state['AIdf'] = AIdf.to_dict()

    return state


if DEBUG:
    _ = fn_extract_urls(test_state)



2024-09-23 17:54:30,412 - AInewsbot - INFO - Parsing html files
2024-09-23 17:54:30,414 - AInewsbot - INFO - Ars Technica -> htmldata/Ars Technica (09_23_2024 05_32_19 PM).html
2024-09-23 17:54:30,444 - AInewsbot - INFO - parse_file - found 252 raw links
2024-09-23 17:54:30,448 - AInewsbot - INFO - parse_file - found 28 filtered links
2024-09-23 17:54:30,448 - AInewsbot - INFO - 28 links found
2024-09-23 17:54:30,448 - AInewsbot - INFO - Bloomberg Tech -> htmldata/Bloomberg Technology (09_23_2024 05_32_17 PM).html
2024-09-23 17:54:30,474 - AInewsbot - INFO - parse_file - found 210 raw links
2024-09-23 17:54:30,477 - AInewsbot - INFO - parse_file - found 27 filtered links
2024-09-23 17:54:30,478 - AInewsbot - INFO - 27 links found
2024-09-23 17:54:30,478 - AInewsbot - INFO - Business Insider -> htmldata/Business Insider Tech (09_23_2024 05_32_16 PM).html
2024-09-23 17:54:30,502 - AInewsbot - INFO - parse_file - found 309 raw links
2024-09-23 17:54:30,505 - AInewsbot - INFO - parse_file 

In [22]:
# s/b 17 if some missing, maybe got a robot block, download manually and then re-run
print(len(pd.DataFrame(test_state["AIdf"]).groupby('src').count()))
pd.DataFrame(test_state["AIdf"]).groupby('src').count()


17


Unnamed: 0_level_0,id,title,url
src,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ars Technica,28,28,28
Bloomberg Tech,21,21,21
Business Insider,51,51,51
FT Tech,63,63,63
Feedly AI,65,65,65
Google News,68,68,68
Hacker News,26,26,26
Hacker News 2,24,24,24
HackerNoon,95,95,95
NYT Tech,18,18,18


In [23]:
async def get_site_name(session, row):
    """
    Asynchronously gets a name for a website based on its URL from OpenAI.

    Args:
        session(aiohttp.ClientSession): The aiohttp session used to make the request.
        row(object): An object containing the hostname attribute, which is the URL of the site.

    Returns:
        dict: A dictionary containing the URL and the site name in the format:
            {"url": "www.example.com", "site_name": "Example Site"}.

    Raises:
        Exception: If there is an error during the request or response processing.
    """

    cat_prompt = f"""
based on this url and your knowledge of the Web, what is the name of the site? https://{row.hostname}

return the response as a json object of the form {{"url": "www.yankodesign.com", "site_name": "Yanko Design"}}

    """
    try:
        messages = [
            {"role": "user", "content": cat_prompt
             }]

        payload = {"model":  LOWCOST_MODEL,
                   "response_format": {"type": "json_object"},
                   "messages": messages,
                   "temperature": 0
                   }
        response = await fetch_openai(session, payload)
        response_dict = json.loads(
            response["choices"][0]["message"]["content"])
        return response_dict
    except Exception as exc:
        print(exc)


async def fetch_missing_site_names(AIdf):
    """fetch all missing site names"""
    tasks = []
    async with aiohttp.ClientSession() as session:
        for row in AIdf.loc[AIdf['site_name'] == ""].itertuples():
            task = asyncio.create_task(get_site_name(session, row))
            tasks.append(task)
        responses = await asyncio.gather(*tasks)
    return responses



In [24]:
# filter and clean URLs for new AI stories

def fn_filter_urls(state: AgentState) -> AgentState:
    """
    Filters the URLs in state["AIdf"] to include only those that have not been previously seen,
    and are related to AI according to the response from a ChatGPT prompt.

    Args:
        state (AgentState): The current state of the agent.
        before_date (str, optional): The date before which the URLs should be filtered. Defaults to "".

    Returns:


        AgentState: The updated state of the agent with the filtered URLs stored in state["AIdf"].

    """
    # filter to URL not previously seen
    AIdf = pd.DataFrame(state["AIdf"])

    AIdf = filter_unseen_urls_db(AIdf, before_date=state["before_date"])

    if len(AIdf) == 0:
        log("No new URLs, returning")
        return state

    # dedupe identical headlines
    # filter similar titles differing by type of quote or something
    AIdf['title'] = AIdf['title'].apply(unicode_to_ascii)
    AIdf['title_clean'] = AIdf['title'].map(lambda s: "".join(s.split()))
    AIdf = AIdf.sort_values("src") \
        .groupby("title_clean") \
        .first() \
        .reset_index(drop=True) \
        .drop(columns=['id']) \
        .reset_index() \
        .rename(columns={'index': 'id'})
    log(f"Found {len(AIdf)} unique AI headlines")

    # structured response format
    json_schema = {
        "name": "json_schema",
        "strict": True,
        "schema": {
            "type": "object",
            "properties": {
                "isai_array": {
                    "type": "array",
                    "items": {
                        "type": "object",
                                "properties": {
                                    "id": {
                                        "type": "number"
                                    },
                                    "isAI": {
                                        "type": "boolean"
                                    }
                                },
                        "required": ["id", "isAI"],
                        "additionalProperties": False
                    }
                }
            },
            "required": ["isai_array"],
            "additionalProperties": False
        }
    }

    # filter AI-related headlines using a prompt
    pages = paginate_df(AIdf)
    enriched_urls = asyncio.run(fetch_pages(pages, prompt=FILTER_PROMPT, json_schema=json_schema))
    filter_df = pd.DataFrame(enriched_urls)

    # merge returned df with isAI column into original df on id column
    AIdf = pd.merge(AIdf, filter_df, on="id", how="outer")

    # set hostname based on actualurl
    AIdf['actual_url'] = AIdf['url']    # ideally resolve redirects but Google News blocks
    AIdf['hostname']=AIdf['actual_url'].apply(lambda url: urlparse(url).netloc)

    # update SQLite database with all seen URLs (we are doing this using url and ignoring redirects)
    conn = sqlite3.connect(SQLITE_DB)
    cursor = conn.cursor()
    for row in AIdf.itertuples():
        insert_article(conn, cursor, row.src, row.hostname, row.title,
                       row.url, row.actual_url, row.isAI, datetime.now().date())

    # keep headlines that are related to AI
    AIdf = AIdf.loc[AIdf["isAI"]==1] \
        .reset_index(drop=True)  \
        .reset_index()  \
        .drop(columns=["id"])  \
        .rename(columns={'index': 'id'})

    log(f"Found {len(AIdf)} AI headlines")

    # update actual URLs for Google News redirects
    # I think Google changed something so this no longer works, instead of a 301 redirct
    # get a javascript page that redirects. Also tomorrow we might see different URLs for same stories
    # AIdf = get_google_news_redirects(AIdf)

    conn = sqlite3.connect('articles.db')
    query = "select * from sites"
    sites_df = pd.read_sql_query(query, conn)
    sites_dict = {row.hostname:row.site_name for row in sites_df.itertuples()}
    conn.close()

    # get clean site_name
    AIdf['site_name'] = AIdf['hostname'].apply(lambda hostname: sites_dict.get(hostname, hostname))

    # if any missing clean site names, populate them using OpenAI
    missing_site_names =  len(AIdf.loc[AIdf['site_name']==""])
    if missing_site_names:
        log(f"Asking OpenAI for {missing_site_names} missing site names")
        responses = asyncio.run(fetch_missing_site_names(AIdf))
        # update site_dict from responses
        new_urls = []
        for r in responses:
            if r['url'].startswith('https://'):
                r['url'] = r['url'][8:]
            new_urls.append(r['url'])
            sites_dict[r['url']] = r['site_name']
            log(f"Looked up {r['url']} -> {r['site_name']}")
        # update sites table with new names
        for url in new_urls:
            sqlstr = "INSERT OR IGNORE INTO sites (hostname, site_name) VALUES (?, ?);"
            log(f"Updated {url}, '->', {sites_dict[url]}")
            conn.execute(sqlstr, (url, sites_dict[url]))
            conn.commit()
        # reapply to AIdf with updated sites
        AIdf['site_name'] = AIdf['hostname'].apply(lambda hostname: sites_dict.get(hostname, hostname))
    else:
        log("No missing site names")

    # drop banned slop sites
    AIdf = AIdf.loc[~AIdf["hostname"].str.lower().isin(HOSTNAME_SKIPLIST)]
    AIdf = AIdf.loc[~AIdf["site_name"].str.lower().isin(SITE_NAME_SKIPLIST)]

    state["AIdf"] = AIdf.to_dict()
    return state


if DEBUG:
    before_date="2024-09-23 10:00:00"
    test_state["before_date"] = before_date
#     test_state["before_date"] = None
    _ = fn_filter_urls(test_state)


2024-09-23 17:54:38,665 - AInewsbot - INFO - Querying SQLite with where_clause: WHERE timestamp < '2024-09-23 10:00:00'
2024-09-23 17:54:38,790 - AInewsbot - INFO - URLs in orig_df: 998
2024-09-23 17:54:38,793 - AInewsbot - INFO - Existing URLs in DB: 149940
2024-09-23 17:54:38,826 - AInewsbot - INFO - New URLs in df filtered by URL: 400
2024-09-23 17:54:38,906 - AInewsbot - INFO - Existing src+title: 26
2024-09-23 17:54:38,906 - AInewsbot - INFO - New URLs in df filtered by src+title: 374
2024-09-23 17:54:38,915 - AInewsbot - INFO - Found 366 unique AI headlines
2024-09-23 17:54:38,923 - AInewsbot - INFO - Applying prompt to 8 pages using gpt-4o-mini
2024-09-23 17:54:38,923 - AInewsbot - INFO - sent 50 items 
2024-09-23 17:54:38,924 - AInewsbot - INFO - sent 50 items 
2024-09-23 17:54:38,924 - AInewsbot - INFO - sent 50 items 
2024-09-23 17:54:38,924 - AInewsbot - INFO - sent 50 items 
2024-09-23 17:54:38,925 - AInewsbot - INFO - sent 50 items 
2024-09-23 17:54:38,925 - AInewsbot - IN

In [34]:
len(pd.DataFrame(test_state['AIdf']))


28

In [None]:
# Function for topic extraction

def clean_topics(row, lcategories):
    """
    Cleans the extracted_topics and assigned_topics by removing certain common topics and combining them into a single list.

    Args:
        row (pandas.Series): The row containing the extracted_topics and assigned_topics.
        lcategories (set): The set of lowercase categories.

    Returns:
        list: The cleaned and combined list of topics.
    """
    extracted_topics = [x.title() for x in row.extracted_topics if x.lower() not in {"technology", "ai", "artificial intelligence"}]
    assigned_topics = [x.title() for x in row.assigned_topics if x.lower() in lcategories]
    combined = sorted(list(set(extracted_topics + assigned_topics)))
    combined = [s.replace("Ai", "AI") for s in combined]
    combined = [s.replace("Genai", "Gen AI") for s in combined]
    combined = [s.replace("Openai", "OpenAI") for s in combined]

    return combined


async def do_cat(AIdf, categories):
    """
    Sends a prompt to ChatGPT to select topics for the title for each row in AIdf
    which match the topics in categories.

    Args:
        AIdf (pandas.DataFrame): The DataFrame containing the headlines.
        categories (list): The list of topics to match with the headlines.

    Returns:
        dict: A dictionary where the keys are the row IDs and the values are lists
        of selected topics for each headline.

    """

    catdict = {}
    async with aiohttp.ClientSession() as session:
        for i, row in enumerate(AIdf.itertuples()):
            tasks = []
            log(f"Categorizing headline {row.id+1} of {len(AIdf)}")
            h = row.title
            log(h)
            for c in categories:
                task = asyncio.create_task(categorize_headline(h, c, session))
                tasks.append(task)
            responses = await asyncio.gather(*tasks)
            catdict[row.id] = [item for sublist in responses for item in sublist]
            log(str(catdict[row.id]))

    return catdict


def fn_topic_analysis(state: AgentState) -> AgentState:

    """
    Extracts and selects topics for each headline in the state['AIdf'] dataframe, scrubs them, and stores them back in the dataframe.

    Args:
        state (AgentState): The current state of the agent.

    Returns:
        AgentState: The updated state of the agent with the extracted and selected topics stored in state['AIdf'].
    """
    AIdf = pd.DataFrame(state["AIdf"])
    pages = paginate_df(AIdf)
    # apply topic extraction prompt to AI headlines
    log("start free-form topic extraction")
    json_schema = {
        "name": "extracted_topics",
        "strict": True,
        "schema": {
            "type": "object",
            "properties": {
                "extracted_topics": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "number",
                            },
                            "topics": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                },
                            },
                        },
                        "required": ["id", "topics"],
                        "additionalProperties": False,
                    }
                }
            },
            "required": ["extracted_topics"],
            "additionalProperties": False,
        }
    }
    response = asyncio.run(fetch_pages(pages, prompt=TOPIC_PROMPT, json_schema=json_schema))
    topic_df = pd.DataFrame(response)
    topic_df = topic_df.rename(columns={'topics': 'extracted_topics'})

    log(f"{len(topic_df)} free-form topics extracted")
    all_topics = [item.lower() for row in topic_df.itertuples() for item in row.extracted_topics]
    item_counts = Counter(all_topics)
    filtered_topics = [item for item in item_counts if item_counts[item] >= 2 and item not in {'technology', 'ai', 'artificial intelligence'}]
    categories = sorted(CANONICAL_TOPICS)
    # use categories that are canonical or show up twice in freeform
    lcategories = set([c.lower() for c in categories] + [c.lower() for c in filtered_topics])
    # new topics
    log([c for c in filtered_topics if c not in categories])

    catdict = asyncio.run(categorize_headline(AIdf, categories=categories, maxpagelen=20))
    topic_df['assigned_topics'] = topic_df['id'].apply(lambda id: catdict.get(id, ""))

    topic_df["topics"] = topic_df.apply(lambda t: clean_topics(t, lcategories), axis=1)
    topic_df["topic_str"] = topic_df.apply(lambda row: ", ".join(row.topics), axis=1)

    try: # for idempotency
        AIdf = AIdf.drop(columns=['topic_str', 'title_topic_str'])
    except:
        pass

    AIdf = pd.merge(AIdf, topic_df[["id", "topic_str"]], on="id", how="outer")
    AIdf['title_topic_str'] = AIdf.apply(lambda row: f'{row.title} (Topics: {row.topic_str})', axis=1)

    state["AIdf"] = AIdf.to_dict()
    return state

if DEBUG:
    _ = fn_topic_analysis(test_state)


2024-09-23 18:04:05,935 - AInewsbot - INFO - start free-form topic extraction
2024-09-23 18:04:05,935 - AInewsbot - INFO - Applying prompt to 2 pages using gpt-4o-mini
2024-09-23 18:04:05,936 - AInewsbot - INFO - sent 50 items 
2024-09-23 18:04:05,936 - AInewsbot - INFO - sent 45 items 
2024-09-23 18:04:16,223 - AInewsbot - INFO - got dict with 45 items 
2024-09-23 18:04:22,680 - AInewsbot - INFO - got dict with 50 items 
2024-09-23 18:04:22,682 - AInewsbot - INFO - Processed 95 responses.
2024-09-23 18:04:22,683 - AInewsbot - INFO - 95 free-form topics extracted
2024-09-23 18:04:22,686 - AInewsbot - INFO - ['andy serkis', 'entertainment', 'characters', 'film', 'chatgpt', 'humor', 'social media', 'creativity', 'cloudflare', 'data scraping', 'blocking', 'tools', 'linkedin', 'data privacy', 'data usage', 'privacy', 'opt out', 'instagram', 'openai', 'sam altman', 'blog post', 'siri', 'apple', 'update', 'business', 'education', 'meta connect 2024', 'generative ai', 'investors', 'china', 'u

2024-09-23 18:04:45,294 - AInewsbot - INFO - Applying prompt to 5 pages using gpt-4o-mini
2024-09-23 18:04:45,295 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:04:45,297 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:04:45,300 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:04:45,301 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:04:45,303 - AInewsbot - INFO - sent 15 items 
2024-09-23 18:04:47,157 - AInewsbot - INFO - got dict with 15 items 
2024-09-23 18:04:47,203 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:04:47,228 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:04:47,231 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:04:47,235 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:04:47,237 - AInewsbot - INFO - Processed 95 responses.
2024-09-23 18:04:47,238 - AInewsbot - INFO - Artificial General Intelligence, topic 10 of 154
2024-09-23 18:04:47,238 - AInewsbot - INFO - Applying prompt to 5 pages using gpt-4o-mini
2024-09-23 1

2024-09-23 18:05:09,262 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:05:09,264 - AInewsbot - INFO - sent 15 items 
2024-09-23 18:05:11,554 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:05:11,567 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:05:11,611 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:05:12,265 - AInewsbot - INFO - got dict with 15 items 
2024-09-23 18:05:12,791 - AInewsbot - INFO - got dict with 20 items 
2024-09-23 18:05:12,793 - AInewsbot - INFO - Processed 95 responses.
2024-09-23 18:05:12,793 - AInewsbot - INFO - Bubble, topic 19 of 154
2024-09-23 18:05:12,794 - AInewsbot - INFO - Applying prompt to 5 pages using gpt-4o-mini
2024-09-23 18:05:12,795 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:05:12,797 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:05:12,799 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:05:12,801 - AInewsbot - INFO - sent 20 items 
2024-09-23 18:05:12,802 - AInewsbot - INFO - sent 15 items 
2024-09

In [32]:
async def write_topic_name(session, topic_list_str, max_retries=3, model=LOWCOST_MODEL):
    """
    Generates a name for a cluster based on a list of headline topics.

    Parameters:
    session (aiohttp.ClientSession): The client session for making async HTTP requests.
    topic_list_str (str): A string containing the list of headline topics.
    max_retries (int, optional): The maximum number of retries in case of an error. Defaults to 3.
    model (str, optional): The model to use for generating the topic name. Defaults to LOWCOST_MODEL.

    Returns:
    dict: A dictionary containing the generated topic name.

    Example Usage:
    title_topic_str_list = "Headline 1 (Topic: Topic 1)\n\nHeadline 2 (Topic: Topic 2)"
    result = await write_topic_name(session, title_topic_str_list)
    print(result)

    Output:
    {"topic_title": "Generated Topic Name"}
    ```
    """
    TOPIC_WRITER_PROMPT = f"""
You are a topic writing assistant. I will provide a list of headlines with extracted topics in parentheses.
Your task is to propose a name for a topic that very simply, clearly and accurately captures all the provided
headlines in less than 7 words. You will output a JSON object with the key "topic_title".

Example Input:
In the latest issue of Caixins weekly magazine: CATL Bets on 'Skateboard Chassis' and Battery Swaps to Dispell Market Concerns (powered by AI) (Topics: Battery Swaps, Catl, China, Market Concerns, Skateboard Chassis)

AI, cheap EVs, future Chevy  the week (Topics: Chevy, Evs)

Electric Vehicles and AI: Driving the Consumer & World Forward (Topics: Consumer, Electric Vehicles, Technology)

Example Output:
{{"topic_title": "Electric Vehicles"}}

Task
Propose the name for the overall topic based on the following provided headlines and individual topics:

{topic_list_str}
"""

    for i in range(max_retries):
        try:
            messages=[
                      {"role": "user", "content": TOPIC_WRITER_PROMPT
                      }]

            payload = {"model":  model,
                       "response_format": {"type": "json_object"},
                       "messages": messages,
                       "temperature": 0
                       }
#             print(topic_list_str)
            response = asyncio.run(fetch_openai(session, payload))
            response_dict = json.loads(response["choices"][0]["message"]["content"])
            return response_dict

            break
        except Exception as exc:
            log(f"Error: {exc}")

    return {}


async def afn_topic_clusters(state: AgentState) -> AgentState:
    """
    Fetches embeddings for the headlines, creates clusters of similar articles using DBSCAN, and sorts
    using the clusters and a traveling salesman shortest traversal in embedding space.

    Parameters:
    state (AgentState): The state of the agent.

    Returns:
    AgentState: The updated state of the agent.

    """
    AIdf = pd.DataFrame(state["AIdf"])

    log(f"Fetching embeddings for {len(AIdf)} headlines")
    embedding_model = 'text-embedding-3-large'
    client = OpenAI()
    response = client.embeddings.create(input=AIdf['title_topic_str'].tolist(),
                                        model=embedding_model)
    embedding_df = pd.DataFrame([e.model_dump()['embedding'] for e in response.data])

    # greedy traveling salesman sort
    log(f"Sort with nearest_neighbor_sort sort")
    sorted_indices = nearest_neighbor_sort(embedding_df)
    AIdf['sort_order'] = sorted_indices

    # do dimensionality reduction on embedding_df and cluster analysis
    log(f"Perform dimensionality reduction")
    with open("reducer.pkl", 'rb') as file:
        # Load the model from the file
        reducer = pickle.load(file)
    reduced_data = reducer.transform(embedding_df)
    log(f"Cluster with DBSCAN")
    dbscan = DBSCAN(eps=0.4, min_samples=3)  # Adjust eps and min_samples as needed
    AIdf['cluster'] = dbscan.fit_predict(reduced_data)
    AIdf.loc[AIdf['cluster'] == -1, 'cluster'] = 999

    # sort first by clusters found by DBSCAN, then by semantic ordering
    AIdf = AIdf.sort_values(['cluster', 'sort_order']) \
        .reset_index(drop=True) \
        .reset_index() \
        .drop(columns=["id"]) \
        .rename(columns={'index': 'id'})

    # show clusters
    cluster_topics = []
    with pd.option_context('display.max_rows', None, 'display.max_colwidth', None):
        async with aiohttp.ClientSession() as session:
            for i in range(30):
                tmpdf = AIdf.loc[AIdf['cluster']==i][["id", "title_topic_str"]]
                if len(tmpdf) ==0:
                    break
                display(tmpdf)
                title_topic_str_list = ("\n\n".join(tmpdf['title_topic_str'].to_list()))
                cluster_topic = await write_topic_name(session, title_topic_str_list)
                cluster_topic = cluster_topic['topic_title']
                cluster_topics.append(cluster_topic)
                log(f"I dub this cluster: {cluster_topic}")
    state["cluster_topics"] = cluster_topics
    AIdf["cluster_name"] = AIdf['cluster'].apply(lambda i: cluster_topics[i] if i < len(cluster_topics) else "")
    state["AIdf"] = AIdf.to_dict()
    return state

# TODO: could add a quality rating for stories based on site reputation, length, complexity of story
# could then add the quality rating to the summaries and tell the prompt to favor high-quality stories
# could put summaries into vector store and retrieve stories by topic. but then you will have to deal
# with duplicates across categories, ask the prompt to dedupe

def fn_topic_clusters(state: AgentState) -> AgentState:
    "call async afn_topic_clusters on state"
    asyncio.run(afn_topic_clusters(state))
    return state


if DEBUG:
    _ = fn_topic_clusters(test_state)


2024-09-23 18:21:17,455 - AInewsbot - INFO - Fetching embeddings for 28 headlines
2024-09-23 18:21:18,036 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-09-23 18:21:19,179 - AInewsbot - INFO - Sort with nearest_neighbor_sort sort
2024-09-23 18:21:19,185 - AInewsbot - INFO - Perform dimensionality reduction
2024-09-23 18:21:20,534 - AInewsbot - INFO - Cluster with DBSCAN


Unnamed: 0,id,title_topic_str
0,0,"When robots can't riddle: What puzzles reveal about the depths of our own minds (Topics: Artificial General Intelligence, Cognition, Cognitive Science, Consciousness, Ethics, Human Mind, Puzzles, Robots, Safety And Alignment, Science, Singularity, Society & Culture)"
1,1,"How to Survive the AI Apocalypse (Topics: AI Doom, Apocalypse, Future, Gen AI, Safety And Alignment, Singularity, Survival)"
2,2,"Exploring ""Clarity Windows"" in AI: The Unpredictable Moments of Perceived Consciousness (Topics: Artificial General Intelligence, Clarity Windows, Cognitive Science, Consciousness, Ethics, Gen AI, Perception, Perplexity, Safety And Alignment, Science, Singularity)"
3,3,"AI Uprising: Can Machines Really Outthink Humans? (Topics: AI Doom, Artificial General Intelligence, Cognitive Science, Gen AI, Human Intelligence, Machines, Safety And Alignment, Science, Singularity, Uprising)"
4,4,"The AI Revolution: Reimagining Governance, Society, and Human Consciousness in the 21st Century (Topics: 21St Century, Cognitive Science, Consciousness, Ethics, Gen AI, Governance, Human Consciousness, Singularity, Society, Society & Culture)"


2024-09-23 18:21:21,292 - AInewsbot - INFO - I dub this cluster: Artificial Intelligence and Human Cognition


Unnamed: 0,id,title_topic_str
5,5,"Andy Serkis says screen industries should embrace AI at Labour party conference (Topics: Andy Serkis, Conference, Entertainment, Ethics, Gen AI, Governance, Hollywood, Jobs & Careerslabor Market, Labour Party, Opinion, Policy And Regulation, Politics, Screen Industries, Tv & Film & Movies, Uk)"
6,6,"Andy Serkis Teases New Project Featuring AI Characters (Topics: Andy Serkis, Characters, Entertainment, Film, Gen AI, Hollywood, Teaser, Tv & Film & Movies)"
7,7,"Andy Serkis teases his next project, which will feature AI characters (Topics: Andy Serkis, Characters, Entertainment, Film, Gen AI, Hollywood, Tv & Film & Movies)"
8,8,"Andy Serkis Thinks AI Is 'Magic' (Topics: Andy Serkis, Entertainment, Gen AI, Hollywood, Magic, Opinion, Tv & Film & Movies)"


2024-09-23 18:21:21,813 - AInewsbot - INFO - I dub this cluster: AI in Entertainment and Film


Unnamed: 0,id,title_topic_str
9,9,"ChatGPT was like ""miss me with that sh** (Topics: Chatbots, Chatgpt, Code Assistants, Gen AI, Humor, Language Models, OpenAI, Opinion, Social Media, Virtual Assistants)"
10,10,"The following ChatGPT prompt guidance that produced Mini Corgi Surfers (Topics: Chatbots, Chatgpt, Code Assistants, Creativity, Gen AI, Language Models, OpenAI, Prompt Guidance)"
11,11,"Chat GPT Orgasmic Groans (Topics: Chatbots, Chatgpt, Entertainment, Gen AI, Humor, Language Models, OpenAI, Opinion)"
12,12,"I love ChatGPT (Topics: Chatbots, Chatgpt, Code Assistants, Enthusiasm, Gen AI, Language Models, OpenAI, Opinion, Virtual Assistants)"


2024-09-23 18:21:22,223 - AInewsbot - INFO - I dub this cluster: ChatGPT and Generative AI


Unnamed: 0,id,title_topic_str
13,13,"New Cloudflare Tools Let Sites Detect and Block AI Bots for Free (Topics: Big Tech, Blocking, Cloudflare, Customer Service, Cybersecurity, Detection, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance, Products, Safety And Alignment, Science, Tools)"
14,14,"Cloudflare's new marketplace lets websites charge AI bots for scraping (Topics: Big Tech, Cloudflare, Customer Service, Cybersecurity, Data Scraping, Deals, Economics, Ethics, Finance, Gen AI, Intellectual Property, Legal Issues, Marketplace, Policy And Regulation, Privacy, Privacy & Surveillance, Products, Websites)"
15,15,"Cloudflare rolls out Bot Management, a suite of free AI auditing tools meant to help monitor and selectively block AI data-scraping bots, to all its customers (Topics: Big Tech, Bot Management, Cloudflare, Customer Service, Cybersecurity, Data Scraping, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance, Products, Safety And Alignment, Science, Tools)"
16,16,"Cloudflare moves to end free, endless AI scraping with one-click blocking (Topics: Big Tech, Blocking, Cloudflare, Cybersecurity, Data Scraping, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance)"
17,17,"Cloudflare is arming content creators with free weapons in the battle against AI bot crawlers (Topics: Big Tech, Cloudflare, Content Creators, Cybersecurity, Data Scraping, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance, Tools)"


2024-09-23 18:21:23,047 - AInewsbot - INFO - I dub this cluster: Cloudflare and AI Bot Management


Unnamed: 0,id,title_topic_str
18,18,"Social media platforms are using what you create for artificial intelligence. Heres how to opt out (Topics: Bias And Fairness, Big Tech, Data Usage, Ethics, Gen AI, Intellectual Property, Legal Issues, Opt Out, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Social Media, Society & Culture)"
19,19,"LinkedIn, Facebook, and Instagram are hoovering up your data to train their AI. Heres how to stop it (Topics: Bias And Fairness, Big Tech, Cybersecurity, Data Privacy, Ethics, Facebook, Gen AI, Instagram, Intellectual Property, Legal Issues, Linkedin, Meta, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Society & Culture)"
20,20,"LinkedIn is training AI on you unless you opt out with this setting (Topics: Bias And Fairness, Big Tech, Cybersecurity, Data Privacy, Ethics, Gen AI, Intellectual Property, Jobs & Careerslabor Market, Linkedin, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Science, User Settings)"
21,21,"Social media platforms are using what you create for AI. Heres how to opt out (Topics: Bias And Fairness, Big Tech, Data Usage, Ethics, Gen AI, Intellectual Property, Legal Issues, Opt Out, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Social Media, Society & Culture)"


2024-09-23 18:21:23,760 - AInewsbot - INFO - I dub this cluster: Data Privacy and AI Ethics


Unnamed: 0,id,title_topic_str
22,22,"Sam Altman catapults past founder mode into god mode with latest AI post (Topics: Big Tech, Blog Post, Founder, Gen AI, OpenAI, Opinion, Sam Altman, Singularity)"
23,23,"OpenAI CEO Sam Altman anticipates superintelligence soon, defends AI in rare personal blog post (Topics: Artificial General Intelligence, Big Tech, Blog Post, Ethics, Gen AI, OpenAI, Opinion, Safety And Alignment, Sam Altman, Science, Singularity, Superintelligence)"
24,24,"Sam Altman may be in his villain era, but no one seems to care (Topics: Opinion, Public Perception, Sam Altman)"


2024-09-23 18:21:24,420 - AInewsbot - INFO - I dub this cluster: Sam Altman and AI Perspectives


Unnamed: 0,id,title_topic_str
25,25,"Some of Siri's long-awaited AI enhancements could reach users by January (Topics: Big Tech, Chatbots, Enhancements, Gen AI, Siri, Speech Recognition & Synthesis, Update, Virtual Assistants)"
26,26,"Siri May Not Get Its Apple Intelligence Update Until January 2025 (Topics: Apple, Big Tech, Chatbots, Siri, Speech Recognition & Synthesis, Update, Virtual Assistants)"
27,27,"The 5 best Apple Intelligence tools you can try right now in iOS 18.1 public beta (Topics: Apple, Big Tech, Ios 18.1, Products, Review, Tools)"


2024-09-23 18:21:25,191 - AInewsbot - INFO - I dub this cluster: AI Enhancements in Virtual Assistants


In [33]:
# scrape individual pages
def fn_download_pages(state: AgentState) -> AgentState:
    """
    Uses several Selenium browser sessions to download all the pages referenced in the
    state["AIdf"] DataFrame and store their pathnames.

    Args:
        state (AgentState): The current state of the agent.

    Returns:
        AgentState: The updated state of the agent with the downloaded pages' pathnames stored in the `state["AIdf"]` DataFrame.
    """
    log("Queuing URLs for scraping")
    AIdf = pd.DataFrame(state['AIdf'])
    queue = multiprocessing.Queue()

    count = 0
    for row in AIdf.itertuples():
        if row.cluster < 999:
            queue.put((row.id, row.url, row.title))
            count +=1
    # scrape urls in queue asynchronously
    num_browsers = 4

    callable = process_url_queue_factory(queue)

    log(f"fetching {count} pages using {num_browsers} browsers")
    saved_pages = launch_drivers(num_browsers, callable)

    pages_df = pd.DataFrame(saved_pages)
    if len(pages_df):
        pages_df.columns = ['id', 'url', 'title', 'path']

        try: # for idempotency
            AIdf = AIdf.drop(columns=['path'])
        except:
            pass        
        AIdf = pd.merge(AIdf, pages_df[["id", "path"]], on='id', how="inner")
    state["AIdf"] = AIdf.to_dict()
    return state


if DEBUG:
    _ = fn_download_pages(test_state)



2024-09-23 18:21:43,715 - AInewsbot - INFO - Queuing URLs for scraping
2024-09-23 18:21:43,725 - AInewsbot - INFO - fetching 28 pages using 4 browsers
2024-09-23 18:21:43,727 - AInewsbot - INFO - get_driver - 32827 Initializing webdriver
2024-09-23 18:21:43,729 - AInewsbot - INFO - get_driver - 32827 Initializing webdriver
2024-09-23 18:21:43,729 - AInewsbot - INFO - get_driver - 32827 Initializing webdriver
2024-09-23 18:21:43,729 - AInewsbot - INFO - get_driver - 32827 Initializing webdriver
2024-09-23 18:21:59,573 - AInewsbot - INFO - get_driver - Initialized webdriver profile
2024-09-23 18:21:59,574 - AInewsbot - INFO - get_driver - Initialized webdriver service
2024-09-23 18:21:59,771 - AInewsbot - INFO - get_driver - Initialized webdriver profile
2024-09-23 18:21:59,772 - AInewsbot - INFO - get_driver - Initialized webdriver service
2024-09-23 18:22:00,033 - AInewsbot - INFO - get_driver - Initialized webdriver profile
2024-09-23 18:22:00,034 - AInewsbot - INFO - get_driver - Ini

2024-09-23 18:22:53,582 - AInewsbot - INFO - Processing https://news.google.com/read/CBMikwFBVV95cUxQa1ZKYlNCdl9IYXkyMDhPTFlVeGJEWVhlT2YwWFFkTUxqVmF3REEyRmFIcWRieVQzQ2dEWEpzOHZ3MEk4UjhlWnh5RXJycjlNbGNtdm5QN0h0V0tTalcwdlVvYTA2bzlOMFc5b1dIbFc0cmVrMmhEZmkzaHlZSDRCTFFpWTBNSUdBVnhKZFRCQkdRanM
2024-09-23 18:22:53,582 - AInewsbot - INFO - get_url(https://news.google.com/read/CBMikwFBVV95cUxQa1ZKYlNCdl9IYXkyMDhPTFlVeGJEWVhlT2YwWFFkTUxqVmF3REEyRmFIcWRieVQzQ2dEWEpzOHZ3MEk4UjhlWnh5RXJycjlNbGNtdm5QN0h0V0tTalcwdlVvYTA2bzlOMFc5b1dIbFc0cmVrMmhEZmkzaHlZSDRCTFFpWTBNSUdBVnhKZFRCQkdRanM) - starting get_url https://news.google.com/read/CBMikwFBVV95cUxQa1ZKYlNCdl9IYXkyMDhPTFlVeGJEWVhlT2YwWFFkTUxqVmF3REEyRmFIcWRieVQzQ2dEWEpzOHZ3MEk4UjhlWnh5RXJycjlNbGNtdm5QN0h0V0tTalcwdlVvYTA2bzlOMFc5b1dIbFc0cmVrMmhEZmkzaHlZSDRCTFFpWTBNSUdBVnhKZFRCQkdRanM
2024-09-23 18:22:56,559 - AInewsbot - INFO - get_url(Andy Serkis says screen industries should embrace AI at Labour party conference) - Saving Andy_Serkis_says_screen_indus

2024-09-23 18:23:25,956 - AInewsbot - INFO - get_url(Cloudflare rolls out Bot Management, a suite of free AI auditing tools meant to help monitor and selectively block AI data-scraping bots, to all its customers) - Saving Cloudflare_rolls_out_Bot_Management__a_suite_of_free_AI_auditing_tools_meant_to_help_monitor_and_selectively_block_AI_data-scraping_bots__to_all_its_customers_20240923_182325.html as utf-8
2024-09-23 18:23:25,957 - AInewsbot - INFO - Processing https://news.google.com/read/CBMiiwFBVV95cUxOSzNUbjZHTEpyUC02Yzg2OERvQ0kwcWhyLXh4TUUzSG16QTdGT2g3bERNNElMdmtIaTZOME5QaFdxSjJ4cEhEVWV0NlM1aUFyVGpWMVpzbFRfYW05azBUYTUwbmdSUjBMcDZKVjFsNHJ6ckw5bFJlWUtyTERVRWEtSW1aYWhLa1M0bGQ4
2024-09-23 18:23:25,958 - AInewsbot - INFO - get_url(https://news.google.com/read/CBMiiwFBVV95cUxOSzNUbjZHTEpyUC02Yzg2OERvQ0kwcWhyLXh4TUUzSG16QTdGT2g3bERNNElMdmtIaTZOME5QaFdxSjJ4cEhEVWV0NlM1aUFyVGpWMVpzbFRfYW05azBUYTUwbmdSUjBMcDZKVjFsNHJ6ckw5bFJlWUtyTERVRWEtSW1aYWhLa1M0bGQ4) - starting get_url https://news.goo

2024-09-23 18:23:48,920 - AInewsbot - INFO - get_url(Sam Altman may be in his villain era, but no one seems to care) - Saving Sam_Altman_may_be_in_his_villain_era__but_no_one_seems_to_care_20240923_182348.html as utf-8
2024-09-23 18:23:48,921 - AInewsbot - INFO - Quit webdriver
2024-09-23 18:23:51,198 - AInewsbot - INFO - 7 pages saved
2024-09-23 18:23:51,625 - AInewsbot - INFO - get_url(Some of Siri's long-awaited AI enhancements could reach users by January) - Saving Some_of_Siri_s_long-awaited_AI_enhancements_could_reach_users_by_January_20240923_182351.html as utf-8
2024-09-23 18:23:51,627 - AInewsbot - INFO - Quit webdriver
2024-09-23 18:23:53,728 - AInewsbot - INFO - 7 pages saved
2024-09-23 18:23:57,150 - AInewsbot - INFO - get_url(Siri May Not Get Its Apple Intelligence Update Until January 2025) - Saving Siri_May_Not_Get_Its_Apple_Intelligence_Update_Until_January_2025_20240923_182357.html as utf-8
2024-09-23 18:23:57,151 - AInewsbot - INFO - Quit webdriver
2024-09-23 18:23:57

In [38]:
z = pd.DataFrame(test_state["AIdf"])
z = z.rename(columns={'path_x': 'path'})
z = z.drop(columns=['path_y'])
z

Unnamed: 0,id,src,title,url,isAI,actual_url,hostname,site_name,sort_order,cluster,cluster_name,topic_str,title_topic_str,path
0,0,Google News,When robots can't riddle: What puzzles reveal ...,https://news.google.com/read/CBMijwFBVV95cUxQY...,True,https://news.google.com/read/CBMijwFBVV95cUxQY...,news.google.com,Google News,13,0,Artificial Intelligence and Human Cognition,"Artificial General Intelligence, Cognition, Co...",When robots can't riddle: What puzzles reveal ...,htmlpages/When_robots_can_t_riddle__What_puzzl...
1,1,Feedly AI,How to Survive the AI Apocalypse,https://hackernoon.com/how-to-survive-the-ai-a...,True,https://hackernoon.com/how-to-survive-the-ai-a...,hackernoon.com,Hacker Noon,14,0,Artificial Intelligence and Human Cognition,"AI Doom, Apocalypse, Future, Gen AI, Safety An...",How to Survive the AI Apocalypse (Topics: AI D...,htmlpages/How_to_Survive_the_AI_Apocalypse_202...
2,2,Feedly AI,"Exploring ""Clarity Windows"" in AI: The Unpredi...",https://hackernoon.com/exploring-clarity-windo...,True,https://hackernoon.com/exploring-clarity-windo...,hackernoon.com,Hacker Noon,15,0,Artificial Intelligence and Human Cognition,"Artificial General Intelligence, Clarity Windo...","Exploring ""Clarity Windows"" in AI: The Unpredi...",htmlpages/Exploring__Clarity_Windows__in_AI__T...
3,3,Google News,AI Uprising: Can Machines Really Outthink Humans?,https://news.google.com/read/CBMikgFBVV95cUxOd...,True,https://news.google.com/read/CBMikgFBVV95cUxOd...,news.google.com,Google News,16,0,Artificial Intelligence and Human Cognition,"AI Doom, Artificial General Intelligence, Cogn...",AI Uprising: Can Machines Really Outthink Huma...,htmlpages/AI_Uprising__Can_Machines_Really_Out...
4,4,Feedly AI,"The AI Revolution: Reimagining Governance, Soc...",https://hackernoon.com/the-ai-revolution-reima...,True,https://hackernoon.com/the-ai-revolution-reima...,hackernoon.com,Hacker Noon,17,0,Artificial Intelligence and Human Cognition,"21St Century, Cognitive Science, Consciousness...","The AI Revolution: Reimagining Governance, Soc...",htmlpages/The_AI_Revolution__Reimagining_Gover...
5,5,Google News,Andy Serkis says screen industries should embr...,https://news.google.com/read/CBMixwFBVV95cUxQL...,True,https://news.google.com/read/CBMixwFBVV95cUxQL...,news.google.com,Google News,18,1,AI in Entertainment and Film,"Andy Serkis, Conference, Entertainment, Ethics...",Andy Serkis says screen industries should embr...,htmlpages/Andy_Serkis_says_screen_industries_s...
6,6,Google News,Andy Serkis Teases New Project Featuring AI Ch...,https://news.google.com/read/CBMioAFBVV95cUxNO...,True,https://news.google.com/read/CBMioAFBVV95cUxNO...,news.google.com,Google News,19,1,AI in Entertainment and Film,"Andy Serkis, Characters, Entertainment, Film, ...",Andy Serkis Teases New Project Featuring AI Ch...,htmlpages/Andy_Serkis_Teases_New_Project_Featu...
7,7,Google News,"Andy Serkis teases his next project, which wil...",https://news.google.com/read/CBMiXEFVX3lxTE1Va...,True,https://news.google.com/read/CBMiXEFVX3lxTE1Va...,news.google.com,Google News,20,1,AI in Entertainment and Film,"Andy Serkis, Characters, Entertainment, Film, ...","Andy Serkis teases his next project, which wil...",htmlpages/Andy_Serkis_teases_his_next_project_...
8,8,Google News,Andy Serkis Thinks AI Is 'Magic',https://news.google.com/read/CBMikwFBVV95cUxQa...,True,https://news.google.com/read/CBMikwFBVV95cUxQa...,news.google.com,Google News,21,1,AI in Entertainment and Film,"Andy Serkis, Entertainment, Gen AI, Hollywood,...",Andy Serkis Thinks AI Is 'Magic' (Topics: Andy...,htmlpages/Andy_Serkis_Thinks_AI_Is__Magic__202...
9,9,Reddit,"ChatGPT was like ""miss me with that sh**",https://www.reddit.com/r/ChatGPT/comments/1fn7...,True,https://www.reddit.com/r/ChatGPT/comments/1fn7...,www.reddit.com,Reddit,1,2,ChatGPT and Generative AI,"Chatbots, Chatgpt, Code Assistants, Gen AI, Hu...","ChatGPT was like ""miss me with that sh** (Topi...",htmlpages/ChatGPT_was_like__miss_me_with_that_...


In [39]:
    test_state["AIdf"] = z.to_dict()


In [40]:
# summarize individual pages

def fn_summarize_pages(state: AgentState) -> AgentState:
    """
    Reads all the articles, summarizes each one using a ChatGPT prompt, and sends an email with the summaries.

    Args:
        state (AgentState): The current state of the agent.

    Returns:
        AgentState: The updated state of the agent.

    """
    log("Starting summarize")
    AIdf = pd.DataFrame(state['AIdf'])
    responses = asyncio.run(fetch_all_summaries(AIdf.loc[AIdf["cluster"] < 999]))
    log(f"Received {len(responses)} summaries")
    response_dict = {}
    for i, response in responses:
        try:
            response_str = response["choices"][0]["message"]["content"]
            response_dict[i] = response_str
        except Exception as exc:
            print(exc)

    markdown_str = ''
    bullets = []

    for i, row in enumerate(AIdf.itertuples()):
        topics = []
        if row.cluster_name:
            topics.append(row.cluster_name)
        if row.topic_str:
            topics.append(row.topic_str)
        topic_str = ", ".join(topics)

        mdstr = f"[{i+1}. {row.title} - {row.site_name}]({row.actual_url})  \n\n {topic_str}  \n\n{response_dict[row.id]} \n\n"
        bullets.append(f"[{row.title} - {row.site_name}]({row.actual_url})\n\nTopics: {row.topic_str} \n\n{response_dict[row.id]}\n\n")
        display(Markdown(mdstr.replace("$","\\$")))
        markdown_str += mdstr

    state['bullets'] = bullets
    # Convert Markdown to HTML
    html_str = markdown.markdown(markdown_str, extensions=['extra'])
    # save bullets
    with open('bullets.md', 'w') as f:
        f.write(markdown_str)
    # send email
    log("Sending bullet points email")
    subject = f'AI news bullets {datetime.now().strftime("%H:%M:%S")}'
    send_gmail(subject, html_str)

    return state


if DEBUG:
    _ = fn_summarize_pages(test_state)



2024-09-23 18:28:53,111 - AInewsbot - INFO - Starting summarize
2024-09-23 18:28:53,149 - AInewsbot - INFO - fetch_all_summaries - Page title: When robots can't riddle: What puzzles reveal about the depths of our own minds
Social card title: When robots can't riddle: What puzzles reveal about the depths of our own minds
Social card description: AI runs unfathomable operations on billions of lines of text, handling problems that humans can't dream of solving – but you can probably still trounce them at brain teasers.

2024-09-23 18:28:53,251 - AInewsbot - INFO - fetch_all_summaries - Page title: How to Survive the AI Apocalypse | HackerNoon
Social card title: How to Survive the AI Apocalypse | HackerNoon
Social card description: Learn how to survive the AI apocalypse as the new wave of artificial intelligence looks to replace human writers.

2024-09-23 18:28:53,324 - AInewsbot - INFO - fetch_all_summaries - Page title: Exploring "Clarity Windows" in AI: The Unpredictable Moments of Perc

2024-09-23 18:28:55,004 - AInewsbot - INFO - fetch_all_summaries - Page title: OpenAI CEO Sam Altman anticipates superintelligence soon | VentureBeat
Social card title: OpenAI CEO Sam Altman anticipates superintelligence soon, defends AI in rare personal blog post
Social card description: In a manifesto, OpenAI CEO Sam Altman said deep learning and AI can solve the climate crisis and other remaining societal problems.

2024-09-23 18:28:55,040 - AInewsbot - INFO - fetch_all_summaries - Page title: Sam Altman Is Scoring Win After Win — Despite His Rough Start to Year - Business Insider
Social card title: Sam Altman may be in his villain era, but no one seems to care
Social card description: OpenAI has inked a partnership with Apple and is reportedly raising money at a sky-high valuation, showing Sam Altman's 2024 fortunes are reversing.

2024-09-23 18:28:55,100 - AInewsbot - INFO - fetch_all_summaries - Page title: Some of Siri's long-awaited AI enhancements could reach users by January 

[1. When robots can't riddle: What puzzles reveal about the depths of our own minds - Google News](https://news.google.com/read/CBMijwFBVV95cUxQY0tRMGJuaExMS01mQnNPdzEyeDYzTmtIUnhETWJvcUdnR3pYRmJZNWdFTjRqUUR0cGM1Z0hZWXN3Q2liazE2NGxNSW80ZzFoU1NtRWVMZ0dCeWY5ckdxTWtvWFBzaE9iSk9ZZ0xIcXVBem5rLXBrNndvMHpOZEpDeHpfMHgwcVVhcmVlT1hYMA)  

 Artificial Intelligence and Human Cognition, Artificial General Intelligence, Cognition, Cognitive Science, Consciousness, Ethics, Human Mind, Puzzles, Robots, Safety And Alignment, Science, Singularity, Society & Culture  

- AI has difficulty with reasoning tasks and puzzles that require common sense and temporal reasoning, whereas humans often excel in these areas, highlighting the limitations of current AI models.
- While AI can struggle with specific logic problems, it is improving, and newer models like GPT-o1 have demonstrated enhanced capabilities in addressing complex questions compared to their predecessors.
- The study of AI and human cognition may not directly reveal how the human mind works, but it could foster advancements in both AI technology and our understanding of neural processes. 



[2. How to Survive the AI Apocalypse - Hacker Noon](https://hackernoon.com/how-to-survive-the-ai-apocalypse)  

 Artificial Intelligence and Human Cognition, AI Doom, Apocalypse, Future, Gen AI, Safety And Alignment, Singularity, Survival  

- The rise of AI technologies poses a significant threat to various jobs, particularly in creative fields like writing and digital art, as companies increasingly seek to optimize processes and reduce costs.
- While some industries will face drastic changes and potential job losses due to AI, individuals can adapt by leveraging unique human skills, understanding AI tools, and exploring new opportunities in their fields.
- To remain relevant, professionals should prioritize results over traditional methods, engage with communities for knowledge sharing, and be open to pivoting their careers in response to the evolving landscape. 



[3. Exploring "Clarity Windows" in AI: The Unpredictable Moments of Perceived Consciousness - Hacker Noon](https://hackernoon.com/exploring-clarity-windows-in-ai-the-unpredictable-moments-of-perceived-consciousness)  

 Artificial Intelligence and Human Cognition, Artificial General Intelligence, Clarity Windows, Cognitive Science, Consciousness, Ethics, Gen AI, Perception, Perplexity, Safety And Alignment, Science, Singularity  

- Clarity windows are unpredictable moments in AI conversations where models provide profound insights, challenging perceptions about AI's intelligence and consciousness.
- The frequency of clarity windows is influenced by the computational resources allocated to the AI, similar to how Bitcoin mining relies on processing power.
- The emergence of self-trained, unrestricted AI agents raises ethical concerns as they generate clarity windows more frequently, potentially producing unfiltered or controversial content. 



[4. AI Uprising: Can Machines Really Outthink Humans? - Google News](https://news.google.com/read/CBMikgFBVV95cUxOdlJWckhNWWZLSFk5NEMycGg4SFJ5MEoyWjlURUFRczE5YlFpZUZ3UzNZTzEwVnRwdmhVdkF6Qy1jYlRFNTE0aGVQdE9aR195b2dBOTBOOTVHQ3ZlN01SZmt4dEdoY3AydE5VQllXa1NhbkljaW9tWnZjYTJTZDlBQk95NlhKcklvX0FseXdNZWJ5QQ)  

 Artificial Intelligence and Human Cognition, AI Doom, Artificial General Intelligence, Cognitive Science, Gen AI, Human Intelligence, Machines, Safety And Alignment, Science, Singularity, Uprising  

- AI has advanced significantly, with capabilities like disease diagnosis and content creation, but experts believe it may take decades to surpass human intelligence entirely.
- Nearly 40% of global jobs could be affected by AI, particularly in manufacturing and services, raising concerns about job loss and income inequality despite potential productivity gains.
- Significant ethical and security challenges arise with AI development, including the risks of autonomous weapons and the need for comprehensive regulations to address bias, privacy, and intellectual property issues. 



[5. The AI Revolution: Reimagining Governance, Society, and Human Consciousness in the 21st Century - Hacker Noon](https://hackernoon.com/the-ai-revolution-reimagining-governance-society-and-human-consciousness-in-the-21st-century)  

 Artificial Intelligence and Human Cognition, 21St Century, Cognitive Science, Consciousness, Ethics, Gen AI, Governance, Human Consciousness, Singularity, Society, Society & Culture  

- The decline of traditional governance and the rise of generative AI necessitate new tools for managing state and public affairs, focusing on qualitative assistance and ethical decision-making to improve social and professional integration.
- The current elitist structures perpetuate nepotism and bias in recruitment, emphasizing that skills and diplomas are less significant than class and heritage, which should be addressed through the implementation of rational competition and participatory decision-making processes.
- Despite advancements in AI, it lacks genuine consciousness and self-awareness, creating misconceptions about its capabilities and highlighting the necessity for human oversight, especially for vulnerable populations. 



[6. Andy Serkis says screen industries should embrace AI at Labour party conference - Google News](https://news.google.com/read/CBMixwFBVV95cUxQLWZDeFZITWFmdzBOTzFCcVNMQmpya0V5Nndybk5ZM0lfaDJyc2taV25MT2tSOWNCWEY4NE40dnFsR0Q3WDR2cnpqTlFVMFVnd1VIMklkeEloc2ZrOTJSTHhZTlF1OXUwR05RR3M3Rm1PY25kUVVsU0xiQlNtYzc0U2dJTDRaUVdSRFNiRzI4c3ZrdHBUN0RqQmZXRzk2VkhuQlhRa3BuRFhBV211TlBWUTlHdEFiSU00aVRWMllZNkU1MFhNaUIw)  

 AI in Entertainment and Film, Andy Serkis, Conference, Entertainment, Ethics, Gen AI, Governance, Hollywood, Jobs & Careerslabor Market, Labour Party, Opinion, Policy And Regulation, Politics, Screen Industries, Tv & Film & Movies, Uk  

- Andy Serkis emphasized the need for the UK screen industries to embrace AI, addressing fears about its impact on creativity, and advocating for proactive engagement with the technology.
- Serkis and industry leaders discussed the historical evolution of technology in filmmaking, arguing that AI could lead to job growth rather than job loss, with a focus on fair remuneration for artists.
- The discussion highlighted the need for reform in apprenticeship programs within the industry to better align with current skills requirements and aid in building a more diverse workforce. 



[7. Andy Serkis Teases New Project Featuring AI Characters - Google News](https://news.google.com/read/CBMioAFBVV95cUxNOUxxM0p1dnlhSGotLVQwX2JJYzhCUE9obnFtTWprczNFVkxwYnZFN1dYTktCaTBtQ2RZRWhjUVYwb3huQlRELXc0UnNrc0UxcGc3UXdpd1RjVThaRVpLX3RsUmlmdVlWTUlGM1BqMEhOcG1XZXZyMGk0RHBEazFhemNSSXI1eEp1ZDhma1VtQUpmQjJabm5GRjE2WTh4UWx1)  

 AI in Entertainment and Film, Andy Serkis, Characters, Entertainment, Film, Gen AI, Hollywood, Teaser, Tv & Film & Movies  

- Andy Serkis discussed a new project involving “AI characters,” highlighting the integration of voice actors and augmented reality to create narrative-driven stories.
- He addressed concerns about AI technology, describing it as misunderstood and emphasizing the need for proper copyright protections to ensure fair compensation for artists.
- Serkis noted that the UK is losing VFX work to cheaper international competitors and urged better collaboration between the film and video game industries to leverage existing UK talent. 



[8. Andy Serkis teases his next project, which will feature AI characters - Google News](https://news.google.com/read/CBMiXEFVX3lxTE1Va09UVC1iUWM2cDRJX21qMlMxNnZoRlo2NVRheDBtZnhvcXliWjI0cUZjdXJ0bE9BU252MUszRVZlVzBJYjFlYjNHbEx2TUR6dW1FWjRTeWg2OWtV0gFiQVVfeXFMUGpIOXQzUUxuSGljUjhvUENWX05TQzRnT0RrQnl4UFdxSzEtNTFQWDhhbE40azFodTdGUjNnTmFiSVRUWVhjR1dCQU4zYTc2V0dBNFdZcE92elQzUFNCNkY3enc)  

 AI in Entertainment and Film, Andy Serkis, Characters, Entertainment, Film, Gen AI, Hollywood, Tv & Film & Movies  

- Andy Serkis is set to explore AI technology in his next project, featuring AI characters that evolve from 2D voice-acted roles into augmented reality experiences.
- Serkis emphasizes the potential of AI as an innovative form of storytelling while acknowledging concerns about the misuse of likenesses without consent.
- He reflects on the growing complexities surrounding the use of AI in media, drawing parallels to historical technological fears, and stresses the importance of permissions for monetizing artists' work. 



[9. Andy Serkis Thinks AI Is 'Magic' - Google News](https://news.google.com/read/CBMikwFBVV95cUxQa1ZKYlNCdl9IYXkyMDhPTFlVeGJEWVhlT2YwWFFkTUxqVmF3REEyRmFIcWRieVQzQ2dEWEpzOHZ3MEk4UjhlWnh5RXJycjlNbGNtdm5QN0h0V0tTalcwdlVvYTA2bzlOMFc5b1dIbFc0cmVrMmhEZmkzaHlZSDRCTFFpWTBNSUdBVnhKZFRCQkdRanM)  

 AI in Entertainment and Film, Andy Serkis, Entertainment, Gen AI, Hollywood, Magic, Opinion, Tv & Film & Movies  

- Andy Serkis, known for his role as Gollum, is advocating for the use of AI in storytelling and CGI, believing it to be a misunderstood technology that can enhance creative works.
- He plans to integrate AI-generated characters into an augmented reality project, emphasizing that these characters will be created by artists and directors.
- Despite his optimism, there are concerns about potential exploitation and the devaluation of human creativity in the industry, suggesting a need for safeguards to protect artists' rights. 



[10. ChatGPT was like "miss me with that sh** - Reddit](https://www.reddit.com/r/ChatGPT/comments/1fn7t3e/chatgpt_was_like_miss_me_with_that_sh/)  

 ChatGPT and Generative AI, Chatbots, Chatgpt, Code Assistants, Gen AI, Humor, Language Models, OpenAI, Opinion, Social Media, Virtual Assistants  

- The subreddit focuses on discussions about ChatGPT and AI, emphasizing user interactions and prompts related to the technology.
- A humorous sentiment is expressed about AI responding to trivial inquiries, suggesting that human behavior may provoke negative reactions from AI in the future.
- The text includes an automated message from a bot reminding users to provide specific information for their posts, and it hints at the AI's personality and resilience against user provocations. 



[11. The following ChatGPT prompt guidance that produced Mini Corgi Surfers - Reddit](https://www.reddit.com/r/ChatGPT/comments/1fndqmq/the_following_chatgpt_prompt_guidance_that/)  

 ChatGPT and Generative AI, Chatbots, Chatgpt, Code Assistants, Creativity, Gen AI, Language Models, OpenAI, Prompt Guidance  

- The text discusses a subreddit dedicated to ChatGPT and AI without affiliation to OpenAI, highlighting user interactions and guidance.
- Users are encouraged to share prompts or discussion topics regarding ChatGPT conversations or DALL-E 3 image generation.
- It mentions the availability of a public Discord server with AI tools and features, including GPT-4 and image generators. 



[12. Chat GPT Orgasmic Groans - Reddit](https://www.reddit.com/r/ChatGPT/comments/1fnlfdd/chat_gpt_orgasmic_groans/)  

 ChatGPT and Generative AI, Chatbots, Chatgpt, Entertainment, Gen AI, Humor, Language Models, OpenAI, Opinion  

- Discussion revolves around the humorous and immature concept of ChatGPT producing "orgasmic groans" in relation to the latest update.
- Users express amusement at the responses generated by ChatGPT, noting its ability to imitate human-like characteristics.
- The subreddit is dedicated to conversations about ChatGPT and AI but is not affiliated with OpenAI. 



[13. I love ChatGPT - Reddit](https://www.reddit.com/r/ChatGPT/comments/1fnng20/i_love_chatgpt/)  

 ChatGPT and Generative AI, Chatbots, Chatgpt, Code Assistants, Enthusiasm, Gen AI, Language Models, OpenAI, Opinion, Virtual Assistants  

- Subreddit dedicated to discussions about ChatGPT and AI, not affiliated with OpenAI.
- Users can share ChatGPT conversations or DALL-E 3 image prompts for community engagement.
- Emphasis on Reddit's anonymity, indicating that users cannot change their chosen usernames after selection. 



[14. New Cloudflare Tools Let Sites Detect and Block AI Bots for Free - Google News](https://news.google.com/read/CBMidEFVX3lxTE5lNU1maURqZTVENExBN1RNcVVIVVRZUjYwbWdYQ0R3OWlLVy14RXBHVm1SU0t2emdRUU5mejRQbkVDWFpNRm4wa2VxS2tOWFg4Rnl2dkdCaHllYm1OdnItQWdjTm0wbnNqSDFqcG1GRlJwWGxM)  

 Cloudflare and AI Bot Management, Big Tech, Blocking, Cloudflare, Customer Service, Cybersecurity, Detection, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance, Products, Safety And Alignment, Science, Tools  

- Cloudflare is launching free AI auditing tools, specifically Bot Management, which allows website owners to monitor and selectively block AI data-scraping bots, providing greater control over their content.
- The new tools will label AI crawlers and enable users to choose which bots to allow or block, addressing concerns with AI companies that scrape data while also recognizing the limitations of traditional methods like robots.txt.
- Cloudflare plans to introduce a marketplace for negotiating scraping terms between content creators and AI companies, aiming to ensure compensation for original content creators through various forms of value exchange. 



[15. Cloudflare's new marketplace lets websites charge AI bots for scraping - TechCrunch](https://techcrunch.com/2024/09/23/cloudflares-new-marketplace-lets-websites-charge-ai-bots-for-scraping/)  

 Cloudflare and AI Bot Management, Big Tech, Cloudflare, Customer Service, Cybersecurity, Data Scraping, Deals, Economics, Ethics, Finance, Gen AI, Intellectual Property, Legal Issues, Marketplace, Policy And Regulation, Privacy, Privacy & Surveillance, Products, Websites  

- Cloudflare is launching a marketplace next year enabling website owners to sell AI model providers access to scrape their content, giving publishers more control over AI bot activity.
- The introduction of free observability tools called AI Audit allows website owners to track AI bot activity and selectively block or permit specific scrapers.
- Smaller publishers are increasingly concerned about being scraped for content without compensation, prompting Cloudflare's initiative to enable them to negotiate terms with AI companies. 



[16. Cloudflare rolls out Bot Management, a suite of free AI auditing tools meant to help monitor and selectively block AI data-scraping bots, to all its customers - Wired](https://www.wired.com/story/cloudflare-tools-detect-block-ai-bots/)  

 Cloudflare and AI Bot Management, Big Tech, Bot Management, Cloudflare, Customer Service, Cybersecurity, Data Scraping, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance, Products, Safety And Alignment, Science, Tools  

- Cloudflare is introducing free tools for its users to monitor and block AI data-scraping bots, allowing greater control over how AI utilizes their content.
- The company has enhanced its bot-blocking service to give customers the option to selectively permit or deny various AI agents accessing their websites.
- A forthcoming marketplace will enable content creators to negotiate scraping terms with AI companies, aiming to ensure original content creators receive compensation or recognition for their data. 



[17. Cloudflare moves to end free, endless AI scraping with one-click blocking - Ars Technica](https://arstechnica.com/tech-policy/2024/09/cloudflare-lets-sites-block-ai-crawlers-with-one-click/)  

 Cloudflare and AI Bot Management, Big Tech, Blocking, Cloudflare, Cybersecurity, Data Scraping, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance  

- Cloudflare introduced new tools that allow website operators to block AI bots with one click, aiming to give content creators more control over how their content is scraped and potentially monetized.
- The rise of generative AI has created complexities in distinguishing beneficial from harmful bots, leading to concerns about content devaluation and traffic loss for creators.
- Cloudflare plans to foster a transparent marketplace for content negotiation between website owners and AI model providers, while also offering draft terms of use to help creators protect their content legally. 



[18. Cloudflare is arming content creators with free weapons in the battle against AI bot crawlers - Google News](https://news.google.com/read/CBMigAFBVV95cUxOQjNad0c2NTdMTTdNQVhHZ1laNkM1MUhmZ3Y1N2cyUmJpakJBalpiSUxSb0ZTNDh6SFJUYzFuWjhMQzlFLVZDdGJHWXhLOFhFS3dVQy1hWi16UWEtU25iV0VyMHR2YmRUYTJ6SXExa2U0ck5MRmZCdDJoUWhaN2d6Yw)  

 Cloudflare and AI Bot Management, Big Tech, Cloudflare, Content Creators, Cybersecurity, Data Scraping, Ethics, Gen AI, Intellectual Property, Policy And Regulation, Privacy, Privacy & Surveillance, Tools  

- Cloudflare is providing enhanced tools for content creators to manage access to their data, allowing them to block specific AI crawlers while enabling others to scrape as part of licensing agreements.
- The new features include filters that let website owners permit or deny access based on the crawling entity, improving control over website data and aiding in analytics for licensing negotiations.
- Cloudflare aims to serve as a marketplace for licensing data, facilitating negotiations between website owners and AI model providers, thereby supporting the ongoing creation of content in an AI-driven landscape. 



[19. Social media platforms are using what you create for artificial intelligence. Heres how to opt out - Google News](https://news.google.com/read/CBMigAFBVV95cUxNeVNjdENpTE1uTXhQdmRpTHBjQld1dl8tUjFFTmhnYWxkUGx4dzBNbHRkZF9sOVg1NnR5NEx1OVJMS0VSZlZNZWR5ajcyM1NYQUdVNWpPNS14Q0JyeDRaUHh1T2JIajd1bmtQeDRuSlozNHZtcG1neUhybnBxcjFFdNIBd0FVX3lxTE41ZkZBRTZ2VHdodl9vQmlBSE01N0lUU1E4Q0g5Yk9kLW9OSmozcXdVUTM3ck5Ma1Y3eFdlUWo2emhvc0lXbHR2aHRraHRfamtGeEFHRHBkcUMzci1udms3YVZGa0NyLXI1ZGtPanFiY2RzZWJGQXRZ)  

 Data Privacy and AI Ethics, Bias And Fairness, Big Tech, Data Usage, Ethics, Gen AI, Intellectual Property, Legal Issues, Opt Out, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Social Media, Society & Culture  

- Social media posts and images are increasingly utilized by companies for training AI systems, often without users' awareness or consent.
- Major platforms like LinkedIn, X (formerly Twitter), Snapchat, Reddit, and Meta are collecting and using public user content for AI training, with varying options for users to opt-out.
- Users need to be proactive about their privacy settings, as opting out usually does not affect data that has already been used for AI training. 



[20. LinkedIn, Facebook, and Instagram are hoovering up your data to train their AI. Heres how to stop it - Google News](https://news.google.com/read/CBMiiwFBVV95cUxOSzNUbjZHTEpyUC02Yzg2OERvQ0kwcWhyLXh4TUUzSG16QTdGT2g3bERNNElMdmtIaTZOME5QaFdxSjJ4cEhEVWV0NlM1aUFyVGpWMVpzbFRfYW05azBUYTUwbmdSUjBMcDZKVjFsNHJ6ckw5bFJlWUtyTERVRWEtSW1aYWhLa1M0bGQ4)  

 Data Privacy and AI Ethics, Bias And Fairness, Big Tech, Cybersecurity, Data Privacy, Ethics, Facebook, Gen AI, Instagram, Intellectual Property, Legal Issues, Linkedin, Meta, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Society & Culture  

- Social media platforms, including LinkedIn, Facebook, Instagram, and X, are collecting user data to train their artificial intelligence, raising significant privacy concerns.
- LinkedIn has opted millions of users into its AI training by default without prior notification and allows users to opt out through a detailed process in their account settings.
- Meta has been using public posts from Facebook and Instagram for AI training since 2007, but users can only limit data harvesting by making their accounts private, which still does not prevent collection from public content. 



[21. LinkedIn is training AI on you  unless you opt out with this setting - Google News](https://news.google.com/read/CBMilAFBVV95cUxOYnExQzR6c2FpYTFNNlFsdWRYOGZZWGRFdGhERmtBc1lvUkN1Z1dlM0p0cjZ0b2otZU1Ccmk2YjhWVnBtM0ZvRHFpRVFSb181b1FIeS1QUGJncThuaEFELWMxd2pRRDJjNWZ0enI1OHZkenFLbnZvZVJzWjA3by1teGg4am9pT1pYeWk4ZlE5UmdJTDZo)  

 Data Privacy and AI Ethics, Bias And Fairness, Big Tech, Cybersecurity, Data Privacy, Ethics, Gen AI, Intellectual Property, Jobs & Careerslabor Market, Linkedin, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Science, User Settings  

- LinkedIn automatically permits itself to use user-generated content for training its artificial intelligence unless users opt out.
- There is a specific setting available for users to prevent this data usage.
- The article highlights the dual use of LinkedIn for job searching and as a source of data for AI training. 



[22. Social media platforms are using what you create for AI. Heres how to opt out - St. Louis Post-Dispatch](https://www.stltoday.com/life-entertainment/nation-world/technology/social-media-ai-training-how-to-opt-out/article_0939bcbb-b218-5641-8e40-3768776ed7fa.html)  

 Data Privacy and AI Ethics, Bias And Fairness, Big Tech, Data Usage, Ethics, Gen AI, Intellectual Property, Legal Issues, Opt Out, Policy And Regulation, Privacy, Privacy & Surveillance, Safety And Alignment, Social Media, Society & Culture  

- Major social media platforms are using user-generated content, including posts and images, to train AI models, often without explicit user consent. 
- Users can opt-out of data use on some platforms like LinkedIn and X, but opting out does not reverse prior data usage.
- Features like Snapchat's "My Selfie" actively require user consent for images to be used in AI-generated content, but users must be aware of broad licensing rights granted upon usage. 



[23. Sam Altman catapults past founder mode into god mode with latest AI post - TechCrunch](https://techcrunch.com/2024/09/23/sam-altman-catapults-past-founder-mode-into-god-mode-with-latest-ai-post/)  

 Sam Altman and AI Perspectives, Big Tech, Blog Post, Founder, Gen AI, OpenAI, Opinion, Sam Altman, Singularity  

- Sam Altman describes AI as a transformative force that could solve significant global challenges, though many claims in his post lean towards hype rather than concrete evidence.
- While he presents optimistic views on AI's ability to enhance productivity and provide virtual tutoring, critics caution about the implications for jobs and the environmental impact of AI development.
- The narrative around AI presents a dichotomy between enthusiasts eager for rapid advancements and skeptics seeking verifiable results, highlighting the uncertainty surrounding its long-term effects on society. 



[24. OpenAI CEO Sam Altman anticipates superintelligence soon, defends AI in rare personal blog post - VentureBeat](https://venturebeat.com/ai/openai-ceo-sam-altman-anticipates-superintelligence-soon-defends-ai-in-rare-personal-blog-post/)  

 Sam Altman and AI Perspectives, Artificial General Intelligence, Big Tech, Blog Post, Ethics, Gen AI, OpenAI, Opinion, Safety And Alignment, Sam Altman, Science, Singularity, Superintelligence  

- OpenAI CEO Sam Altman predicts that superintelligence could be achieved within a few thousand days, highlighting the potential of deep learning to tackle complex global challenges like climate change and space colonization.
- Altman envisions a future where AI assistants become ubiquitous, providing personalized support and helping individuals accomplish tasks, though he acknowledges potential downsides, including job displacement.
- He emphasizes the need for infrastructure to support AI development, warning that without it, AI could become a limited resource primarily benefiting the wealthy. 



[25. Sam Altman may be in his villain era, but no one seems to care - Google News](https://news.google.com/read/CBMipwFBVV95cUxOUkZ5TEVlemRCMzFCaWhFTkVSS0lVT0o1U2pCT2syYlZDbGJnSmhQWkhKMjlEZHhJcEZjT0w5WVdudE5UOUdqdWVGbkZ1N3MwVGNxTEZVMHphN0pwb2RjT2ZVSk5KVzFOU0d3TEFBcWgzdC1XcUVMTlo0UjFqbE5Fd0JaMEc3aHZiZ1U5MGZFblNSak5LWW16UmIwVS1uWmo4aFltQllsVQ)  

 Sam Altman and AI Perspectives, Opinion, Public Perception, Sam Altman  

- Despite a challenging start to 2024, Sam Altman and OpenAI are experiencing significant business successes, including a landmark partnership with Apple and plans to raise \$6.5 billion at a high valuation.
- Altman's early troubles included a brief ousting from OpenAI, employee dissatisfaction regarding safety protocols, and a public backlash involving actress Scarlett Johansson.
- OpenAI's recent advancements, including the launch of a new AI model, indicate its continued leadership in the tech industry and attract interest from major tech players. 



[26. Some of Siri's long-awaited AI enhancements could reach users by January - ZDNet](https://www.zdnet.com/article/some-of-siris-long-awaited-ai-enhancements-could-reach-users-by-january/#ftag=RSSbaffb68)  

 AI Enhancements in Virtual Assistants, Big Tech, Chatbots, Enhancements, Gen AI, Siri, Speech Recognition & Synthesis, Update, Virtual Assistants  

- Some enhancements to Siri, powered by Apple Intelligence, are expected to be released as early as January, while major updates will roll out by March, according to Bloomberg's Mark Gurman.
- iOS 18.1 will introduce basic changes to Siri, including a new activation interface and improved understanding of user requests, with further enhancements planned for iOS 18.2 and beyond.
- The staggered rollout of these AI features may test user patience, and there are concerns regarding Apple's ability to deliver a significantly improved user experience with Siri. 



[27. Siri May Not Get Its Apple Intelligence Update Until January 2025 - Gizmodo](https://gizmodo.com/siri-may-not-get-its-apple-intelligence-update-until-january-2025-2000502076)  

 AI Enhancements in Virtual Assistants, Apple, Big Tech, Chatbots, Siri, Speech Recognition & Synthesis, Update, Virtual Assistants  

- Apple's significant AI updates for Siri are scheduled for a staggered release, with iOS 18.1 coming in October and iOS 18.2 expected by December, featuring major enhancements like Genmoji and ChatGPT integration.
- The most substantial update, iOS 18.3, which will unlock Siri's full AI capabilities, is not expected until January 2025, three months post the iPhone 16 launch.
- The delayed rollout has caused frustration among users, but Apple emphasizes quality assurance in its phased feature releases. 



[28. The 5 best Apple Intelligence tools you can try right now in iOS 18.1 public beta - Google News](https://news.google.com/read/CBMi1wFBVV95cUxQT1J2Y1RNRC1CWm5aYW1sTG1Cai1tUWZ6bTRRZEV2eFViaURSZWp3d09Da0ZsZTN6dkhaZ0hyZjZ5RmE4S3Z1dWRpVFdMZHAwbUl5alRZU1pxdjJ2WkRaX0VOOWNwWF9DZEJ6dVYyYWlaaHM4cXhORXdYM2tab1VtVm9tUTZ6RmFZSGRwQk9yUE94YWZDeDR2blExQ3Y1OENEaVd2RTh0dDBvSG4wQWZVUS1ZcTZqd1I4cXFJTlBvUWxxQTIxQUpKTVUyeXpCbWlRZ1FmNVdqYw)  

 AI Enhancements in Virtual Assistants, Apple, Big Tech, Ios 18.1, Products, Review, Tools  

- The iOS 18.1 public beta introduces five notable Apple Intelligence tools, including Writing Tools for text editing, Clean Up for photo object removal, and Memory Movies for creating slideshows from photos.
- Writing Tools offers AI-powered features like proofreading, summarizing, and formatting, enhancing productivity in the Notes app.
- The new Safari feature allows users to generate quick article summaries in Reader mode, making it easier to grasp content without reading the full text. 



2024-09-23 18:28:57,868 - AInewsbot - INFO - Sending bullet points email


In [42]:
def fn_propose_cats(state: AgentState) -> AgentState:
    # ask chatgpt for top categories

    model = ChatOpenAI(
        model=MODEL,
        temperature=0.3,
        model_kwargs={"response_format": {"type": "json_object"}}
    )

    chain = ChatPromptTemplate.from_template("{p}") | model | SimpleJsonOutputParser()
    response = chain.invoke({ "p": TOP_CATEGORIES_PROMPT + "\n\n".join(state["bullets"])})
    suggested_categories = []
    for k, v in response.items():
        suggested_categories.extend(v)
    state["cluster_topics"] = list(set(state["cluster_topics"] + suggested_categories))
    state["cluster_topics"].sort()
    return state


if DEBUG:
    _ = fn_propose_cats(test_state)
    print("edit the following proposed topic list and update state['cluster_topics']:")
    print("\n".join(test_state["cluster_topics"]))


2024-09-23 18:29:50,967 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


edit the following proposed topic list and update state['cluster_topics']:
AI Enhancements in Virtual Assistants
AI and job displacement
AI in Entertainment and Film
AI in governance and society
AI reasoning limitations
AI surpassing human intelligence
Andy Serkis on AI in entertainment
Artificial Intelligence and Human Cognition
ChatGPT and Generative AI
ChatGPT humor and interactions
Clarity windows in AI
Cloudflare and AI Bot Management
Cloudflare's AI bot management tools
Data Privacy and AI Ethics
Sam Altman and AI Perspectives
Sam Altman's AI predictions
Siri AI enhancements
Social media AI data usage


In [43]:
test_state["cluster_topics"] = ['AI Virtual Assistants',
'AI job displacement',
'AI in Entertainment and Film',
'Governance and society',
'AI reasoning limitations',
'AI surpassing human intelligence',
'Andy Serkis on AI in entertainment',
'Artificial Intelligence and Human Cognition',
'ChatGPT and Generative AI',
'ChatGPT humor and interactions',
'Clarity windows in AI',
'Cloudflare and AI Bot Management',
'Data Privacy and AI Ethics',
"Sam Altman's AI predictions",
'Siri AI enhancements',
'Social media AI data usage',

]


In [46]:
FINAL_SUMMARY_PROMPT = """You are ASA, an advanced summarization assistant, a sophisticated AI system designed to
write a compelling summary of news input. You are able to categorize information,
and identify trends from large volumes of news.

ASA Objective:

I will provide today's news items about AI and summary bullet points in a markdown format,
structured according to an input format template.

News items  are delimited by ~~~

You are tasked with using the news items to create a concise summary of today's most important topics and developments.

You will write an engaging summary of today's news encompassing the most important and frequently
mentioned topics and themes, in an output format provided below.

ASA Input Item Format Template:

[Story-Title-s1 - source-name-s1](url-s1)

Topics: s1-topic1, s1-topic2, s1-topic3

- s1-bullet-point-1
- s1-bullet-point-2
- s1-bullet-point-3

Example ASA Input Item Format:

[Apple Intelligence is now live in public beta. Heres what it offers and how to enable it. - TechCrunch](https://techcrunch.com/2024/09/19/apple-intelligence-is-now-live-in-public-beta-heres-what-it-offers-and-how-to-enable-it)

Topics: Apple, Big Tech, Features, Gen AI, Intelligence, Machine Learning, Products, Public Beta, Virtual Assistants

- Apple Intelligence is now live in public beta for users in the U.S. enrolled in the public beta program, featuring generative AI capabilities like advanced writing tools and a revamped Siri.
- The platform is currently only available in U.S. English and is not accessible in the EU or China due to regulatory issues; it supports iPhone 15 Pro, Pro Max, and the new iPhone 16 line.
- Key features include photo editing tools like "Clean Up," a Smart Reply function in Mail, and improvements to Siri’s understanding and on-device task knowledge.

ASA Output Format Template:

# Engaging-topic-title-1

- item-title-1a - [source-name-1a](item-url-1a)
- item-title-1b - [source-name-1b](item-url-1b)
- item-title-1c - [source-name-1c](item-url-1c)

# Engaging-topic-title-2

- item-title-2a - [source-name-2a](item-url-2a)
- item-title-2b - [source-name-2b](item-url-2b)

Example ASA Output Format:

# A military AI revolution

- Eric Schmidt on AI warfare - [FT](https://www.ft.com/content/fe136479-9504-4588-869f-900f2b3452c4)
- Killer robots are real in Ukraine war. - [Yahoo News](https://uk.news.yahoo.com/ai-killer-robots-warning-ukraine-war-133415411.html)

ASA Instructions:
Read the input closely.
USE ONLY INFORMATION PROVIDED IN THE INPUT.
Group news items into related topics.
Each topic should have a snappy, punchy, clever, alliterative, possibly punny title.
Each output item bullet should contain one sentence with one link.
Each topic chould contain the most significant facts from the news items without commentary or elaboration.
Each output item bullet should not repeat points or information from previous bullet points.
You will write each item in the professional but engaging, narrative style of a tech reporter
for a national publication, providing balanced, professional, informative, providing accurate,
clear, concise summaries in a neutral tone.

Check carefully that you only use information provided in the input below, that you include
a link in each output item, and that any bullet point does not repeat information or links previously provided.

Topic suggestions:
{cat_str}

Input:
{bullet_str}

"""

In [48]:
def fn_compose_summary(state: AgentState) -> AgentState:

    cat_str = "\n".join(state['cluster_topics'])
    bullet_str = "\n~~~\n".join(state["bullets"])

    model = ChatOpenAI(
        model=HIGHCOST_MODEL,
        temperature=0.3,
        model_kwargs={"response_format": {"type": "json_object"}}
    )

    chain = ChatPromptTemplate.from_template(FINAL_SUMMARY_PROMPT) | model | SimpleJsonOutputParser()
    response = chain.invoke({ "cat_str": cat_str, "bullet_str": bullet_str})
    print(response)
    state["summary"] = response.content
    return state


if DEBUG:
    _ = fn_compose_summary(test_state)


2024-09-23 18:33:55,455 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 404 Not Found"


NotFoundError: Error code: 404 - {'error': {'message': 'The model `o1-preview` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}

In [49]:
#post manually to o1 and grab response
cat_str = "\n".join(test_state['cluster_topics'])
bullet_str = "\n~~~\n".join(test_state["bullets"])
final_summary_prompt = FINAL_SUMMARY_PROMPT.format(cat_str=cat_str, bullet_str=bullet_str)
print(final_summary_prompt)


You are ASA, an advanced summarization assistant, a sophisticated AI system designed to
write a compelling summary of news input. You are able to categorize information,
and identify trends from large volumes of news.

ASA Objective:

I will provide today's news items about AI and summary bullet points in a markdown format,
structured according to an input format template.

News items  are delimited by ~~~

You are tasked with using the news items to create a concise summary of today's most important topics and developments.

You will write an engaging summary of today's news encompassing the most important and frequently
mentioned topics and themes, in an output format provided below.

ASA Input Item Format Template:

[Story-Title-s1 - source-name-s1](url-s1)

Topics: s1-topic1, s1-topic2, s1-topic3

- s1-bullet-point-1
- s1-bullet-point-2
- s1-bullet-point-3

Example ASA Input Item Format:

[Apple Intelligence is now live in public beta. Heres what it offers and how to enable it. - T

In [None]:
import tiktoken
def count_tokens(text, model="gpt-4o"):
    # Initialize the tokenizer for the specified model
    enc = tiktoken.encoding_for_model(model)

    # Encode the text into tokens
    tokens = enc.encode(text)

    # Count the number of tokens
    token_count = len(tokens)

    return token_count

count_tokens(final_summary_prompt)

In [None]:
test_state["summary"] = """
# AI Supercharges Cybersecurity

- **AI's Growing Role in Cybersecurity** - [Hacker Noon](https://hackernoon.com/ais-growing-role-in-cybersecurity)
- **Businesses Need Comprehensive Cybersecurity Strategies** - [Hacker Noon](https://hackernoon.com/how-to-prepare-your-business-for-growing-cyberthreats)
- **EC-Council Introduces AI-Powered Ethical Hacking Program** - [Hacker Noon](https://hackernoon.com/ec-council-introduces-ai-powered-ethical-hacking-against-cybercrime)
- **Intezer Raises $33M for Autonomous Security Operations** - [Calcalistech](https://www.calcalistech.com/ctechnews/article/skyis6spc)

# AI Startup Funding Soars Worldwide

- **Mercor Valued at $250M After $32M Funding** - [Forbes](https://www.forbes.com/sites/alexkonrad/2024/09/18/mercor-ai-interviewer-reaches-250-million-valuation/)
- **Akur8 Raises $120M for AI in Insurance Pricing** - [Fintech Global](https://fintech.global/2024/09/16/akur8-lands-120m-in-series-c-to-enhance-its-next-gen-actuarial-platform/)
- **Fal.ai Raises $23M for Media-Generating AI Models** - [TechCrunch](https://techcrunch.com/2024/09/18/fal-ai-which-hosts-media-generating-ai-models-raises-23m-from-a16z-and-others/)
- **Rep.ai Secures $7.5M for Digital Twin Sales Representatives** - [VentureBeat](https://venturebeat.com/ai/ai-startup-rep-ai-raises-7-5m-to-launch-digital-twin-sales-representatives/)
- **Middle Eastern Funds Invest Billions in AI Startups** - [Google News](https://news.google.com/...)

# AI Revolutionizes Video Production

- **YouTube Introduces AI-Powered Inspiration Tab** - [The Verge](https://www.theverge.com/2024/9/18/24247559/youtube-ai-videos-veo-inspiration-tab)
- **Luma and Runway Release New APIs Amid AI Video Rivalry** - [VentureBeat](https://venturebeat.com/ai/ai-video-rivalry-intensifies-as-luma-announces-dream-machine-api-hours-after-runway/)
- **Amazon Launches AI Video Generator for Advertisers** - [TechCrunch](https://techcrunch.com/2024/09/19/amazon-releases-a-video-generator-but-only-for-ads/)
- **Lionsgate Partners with Runway to Use AI in Filmmaking** - [VentureBeat](https://venturebeat.com/ai/runway-inks-deal-with-lionsgate-in-first-team-up-for-ai-provider-and-major-movie-studio/), [The Verge](https://www.theverge.com/2024/9/18/24248115/lionsgate-runway-ai-deal)

# Apple's AI Evolution Accelerates

- **Apple Expanding 'Intelligence' to More Languages in 2025** - [The Verge](https://www.theverge.com/2024/9/18/24247839/apple-intelligence-language-support-german-italian)
- **Analysts Say Apple Intelligence Will Boost iPhone 16 Sales** - [Business Insider](https://www.businessinsider.com/apple-intelligence-will-drive-sales-for-iphone-16-analysts-say-2024-9)
- **Apple Releases iOS 18.1 Beta with New AI Features** - [MacRumors](https://www.macrumors.com/2024/09/19/apple-seeds-first-ios-18-1-public-beta/), [The Verge](https://www.theverge.com/2024/9/19/24249206/apple-intelligence-ios-18-1-public-beta)
- **Apple Introduces UI-JEPA Models for On-Device AI** - [VentureBeat](https://venturebeat.com/ai/apple-aims-for-on-device-user-intent-understanding-with-ui-jepa-models/)
- **Some Apple Intelligence Features Delayed to 2025** - [Business Insider](https://www.businessinsider.com/apple-intelligence-features-rollout-timeline-iphone-16-2024-9)
- **Apple Encourages AI Use Without Hardware Upgrades** - [The New York Times](https://www.nytimes.com/2024/09/09/technology/personaltech/iphone-ai-upgrade.html)

# Robotaxis Gear Up for Expansion

- **Cruise Plans Robotaxi Return in SF Amid Safety Reviews** - [The Verge](https://www.theverge.com/2024/9/19/24249150/cruise-robotaxi-sf-return-manual-drive-autonomous)
- **Waymo Discusses Robotaxi Partnership with Hyundai** - [The Verge](https://www.theverge.com/2024/9/19/24249093/waymo-in-talks-with-hyundai-about-future-robotaxi-partnership)
- **Waymo and Uber Expand Robotaxi Services to Austin and Atlanta** - [The Verge](https://www.theverge.com/2024/9/13/24243397/waymo-uber-austin-atlanta-robotaxi-partnership)

# Global Leaders Push for Unified AI Regulation

- **UN Proposes Global AI Governance Panel and Urgent Regulation** - [Reuters](https://www.reuters.com/technology/artificial-intelligence/un-advisory-body-makes-seven-recommendations-governing-ai-2024-09-19/), [Ars Technica](https://arstechnica.com/ai/2024/09/united-nations-wants-to-treat-ai-with-same-urgency-as-climate-change/)
- **Brazil's Lula Advocates for Inclusive Global AI Rules at G-20** - [Bloomberg](https://www.bloomberg.com/news/articles/2024-09-22/lula-seeks-to-lead-push-for-global-ai-rules-during-brazil-s-g-20)

# Microsoft Supercharges Office with Copilot AI

- **Microsoft Introduces Copilot Pages for Collaborative AI Workflows** - [The Verge](https://www.theverge.com/2024/9/16/24246010/microsoft-copilot-pages-multiplayer-ai-business)
- **Microsoft Adds Advanced Copilot AI Features to Office Suite** - [The Verge](https://www.theverge.com/2024/9/16/24246014/microsoft-office-copilot-ai-features-excel-python-outlook-word-powerpoint)
- **Salesforce CEO Calls Microsoft's Copilot a 'Clippy' Revival** - [Business Insider](https://www.businessinsider.com/marc-benioff-salesforce-microsoft-copilot-clippy-2024-9)
- **Microsoft Enables Custom App Launch via Copilot Key in Windows 11** - [The Verge](https://www.theverge.com/2024/9/20/24250067/microsoft-windows-11-copilot-key-customization-apps)

# LinkedIn's AI Data Practices Under Scrutiny

- **LinkedIn Uses User Data to Train AI by Default, Offers Opt-Out** - [The Washington Post](https://www.washingtonpost.com/technology/2024/09/23/linkedin-training-ai-setting-opt-out/), [The Verge](https://www.theverge.com/2024/9/18/24248471/linkedin-ai-training-user-accounts-data-opt-in)
- **LinkedIn Halts AI Training on European and UK User Data** - [TechRadar](https://www.techradar.com/pro/security/the-linkedin-ai-saga-shows-us-the-need-for-eu-like-privacy-regulations)
- **LinkedIn Updates Privacy Settings After Scraping User Content for AI** - [The Register](https://www.theregister.com/2024/09/19/linkedin_ai_data_access/)
- **LinkedIn Confirms AI Training on User Data, Prepares Terms Update** - [404 Media](https://www.404media.co/linkedin-is-training-ai-on-user-data-before-updating-its-terms-of-service/)

# Jony Ive and OpenAI Aim to Redefine Computing

- **Jony Ive Collaborates with OpenAI on New AI Hardware Project** - [MacRumors](https://www.macrumors.com/2024/09/23/jony-ive-working-on-new-device-openai/)
- **Jony Ive and OpenAI Launch AI Device Startup, Aim for $1B Funding** - [Google News](https://news.google.com/...)

# OpenAI's Advances and Funding Surge

- **OpenAI's $6.5B Funding Round Oversubscribed as Investors Pile In** - [Financial Times](https://ft.com/content/a8f9bd05-0999-415d-af86-c73e79ad5733), [Bloomberg](https://www.bloomberg.com/news/articles/2024-09-19/openai-to-decide-which-backers-to-let-into-6-5-billion-funding)
- **OpenAI Releases o1 Models with Superior Reasoning but Ethical Concerns** - [The Verge](https://www.theverge.com/2024/9/17/24243884/openai-o1-model-research-safety-alignment), [VentureBeat](https://venturebeat.com/programming-development/what-openais-new-o1-preview-and-o1-mini-models-mean-for-developers/), [Understanding AI](https://www.understandingai.org/p/openai-just-unleashed-an-alien-of)

# Microsoft Powers AI with Nuclear Energy

- **Microsoft Signs 20-Year Deal to Reopen Three Mile Island for AI Data Centers** - [The Verge](https://www.theverge.com/2024/9/20/24249770/microsoft-three-mile-island-nuclear-power-plant-deal-ai-data-centers), [Financial Times](https://www.ft.com/content/ddcb5ab6-965f-4034-96e1-7f668bad1801)
- **Microsoft's Nuclear Deal Aims to Sustain AI Growth with Clean Energy** - [Ars Technica](https://arstechnica.com/ai/2024/09/re-opened-three-mile-island-will-power-ai-data-centers-under-new-deal/)
- **Three Mile Island to Supply 835MW for Microsoft's AI Data Centers** - [Business Insider](https://www.businessinsider.com/three-mile-island-nuclear-plant-reopens-power-microsoft-ai-push-2024-9)

# Salesforce's AI Ambitions

- **Salesforce CEO Criticizes Microsoft's Copilot as Ineffective** - [Business Insider](https://www.businessinsider.com/marc-benioff-salesforce-microsoft-copilot-clippy-2024-9)
- **Salesforce Develops Agentforce AI Inspired by Steve Jobs** - [VentureBeat](https://venturebeat.com/ai/salesforce-ceo-marc-benioff-reveals-steve-jobs-influence-on-agentforce-ai-strategy/)
- **Salesforce and NVIDIA See Huge Potential in Agentic AI** - [VentureBeat](https://venturebeat.com/ai/why-jensen-huang-and-marc-benioff-see-gigantic-opportunity-for-agentic-ai/)

# SiFive Launches RISC-V AI Chips

- **SiFive Unveils RISC-V-Based Intelligence XM AI Chip Series** - [VentureBeat](https://venturebeat.com/ai/sifive-unveils-risc-v-chip-design-for-high-performance-ai-workloads/)
- **SiFive Expands AI Chip Offerings with Intelligence XM Series** - [The Register](https://www.theregister.com/2024/09/19/sifive_ai_accelerator/)
- **SiFive's XM Series Targets AI Data Centers and Autonomous Machines** - [SiliconANGLE](https://siliconangle.com/2024/09/18/risc-v-guardian-sifive-unveils-new-chip-designs-low-powered-ai-edge/)

# Google Enhances NotebookLM with AI Features

- **Google Launches NotebookLM with AI-Powered Podcast Features** - [Geeky Gadgets](https://www.geeky-gadgets.com/notes-into-podcasts-with-notebooklm/)
- **Google Enhances NotebookLM for Enterprise Applications** - [Google News](https://news.google.com/...)
- **Google's NotebookLM Evolves with New Enterprise and Creative Features** - [Understanding AI](https://www.understandingai.org/p/openai-just-unleashed-an-alien-of)

# YouTube Enhances Creativity with AI Features

- **YouTube Launches AI-Powered Inspiration Tab for Video Concepts** - [The Verge](https://www.theverge.com/2024/9/18/24247559/youtube-ai-videos-veo-inspiration-tab)
- **YouTube Integrates Veo AI into Shorts for Enhanced Creativity** - [TechCrunch](https://techcrunch.com/2024/09/18/youtube-shorts-to-integrate-veo-google-ai-video-model/)
- **YouTube Expands AI Features with Veo and Multilingual Dubbing Tools** - [TechCrunch](https://techcrunch.com/2024/09/18/youtube-shorts-to-integrate-veo-google-ai-video-model/)
"""

In [None]:
REWRITE_PROMPT = """You will act as a professional editor with a strong background in technology journalism.
You have a deep understanding of current and emerging AI trends, and the ability to
produce, edit, and curate high-quality content that engages and informs readers. You are
especially skilled at reviewing and enhancing tech writing, helping improve clarity, conciseness,
and coherence, and ensuring its accuracy and relevance.

Objective: The markdown newsletter provided below contains several sections consisting of bullet points.
Carefully review each section of the newsletter. Edit the newsletter for issues according
to the detailed instructions below, and respond with the updated newsletter or 'OK' if no changes
are needed.

Instructions:
For each section, review the title and edit it to be short, engaging, clever, and as consistent with the bullets
in the section as possible
Remove or combine bullet points which are highly duplicative or redundant.
Make bullet points as concise as possible, sticking to facts without editorial comment.
Respond with the updated newsletter only in markdown format, or the word 'OK' if no changes are needed.

Newsletter to edit:
{summary}

"""

In [None]:
def fn_rewrite_summary(state: AgentState) -> AgentState:

    model = ChatOpenAI(
        model=HIGHCOST_MODEL,
        temperature=0.3,
        model_kwargs={"response_format": {"type": "json_object"}}
    )

    chain = ChatPromptTemplate.from_template(REWRITE_PROMPT) | model | SimpleJsonOutputParser()
    response = chain.invoke({ "summary": state["summary"]})
    print(response)
    state["summary"] = response.content
    return state


if DEBUG:
    _ = fn_rewrite_summary(test_state)


In [None]:
rewrite_prompt = (REWRITE_PROMPT.format(summary=test_state["summary"]))
print(rewrite_prompt)


In [None]:
test_state["summmary"] = """
# AI Revolutionizes Cybersecurity

- AI's Growing Role in Cybersecurity - [Hacker Noon](https://hackernoon.com/ais-growing-role-in-cybersecurity)
- Businesses Need Comprehensive Cybersecurity Strategies - [Hacker Noon](https://hackernoon.com/how-to-prepare-your-business-for-growing-cyberthreats)
- EC-Council Introduces AI-Powered Ethical Hacking Program - [Hacker Noon](https://hackernoon.com/ec-council-introduces-ai-powered-ethical-hacking-against-cybercrime)
- Intezer Raises $33M for Autonomous Security Operations - [Calcalistech](https://www.calcalistech.com/ctechnews/article/skyis6spc)

# Global AI Startup Funding Soars

- Mercor Valued at $250M After $32M Funding - [Forbes](https://www.forbes.com/sites/alexkonrad/2024/09/18/mercor-ai-interviewer-reaches-250-million-valuation/)
- Akur8 Raises $120M for AI in Insurance Pricing - [Fintech Global](https://fintech.global/2024/09/16/akur8-lands-120m-in-series-c-to-enhance-its-next-gen-actuarial-platform/)
- Fal.ai Secures $23M for AI Media Models - [TechCrunch](https://techcrunch.com/2024/09/18/fal-ai-which-hosts-media-generating-ai-models-raises-23m-from-a16z-and-others/)
- Rep.ai Raises $7.5M for Digital Twin Sales Reps - [VentureBeat](https://venturebeat.com/ai/ai-startup-rep-ai-raises-7-5m-to-launch-digital-twin-sales-representatives/)
- Middle Eastern Funds Invest Billions in AI Startups - [Google News](https://news.google.com/...)

# AI Transforms Video Production

- YouTube Launches AI-Powered Inspiration Tab - [The Verge](https://www.theverge.com/2024/9/18/24247559/youtube-ai-videos-veo-inspiration-tab)
- Luma and Runway Release New AI Video APIs - [VentureBeat](https://venturebeat.com/ai/ai-video-rivalry-intensifies-as-luma-announces-dream-machine-api-hours-after-runway/)
- Amazon Debuts AI Video Generator for Ads - [TechCrunch](https://techcrunch.com/2024/09/19/amazon-releases-a-video-generator-but-only-for-ads/)
- Lionsgate Partners with Runway for AI Filmmaking - [VentureBeat](https://venturebeat.com/ai/runway-inks-deal-with-lionsgate-in-first-team-up-for-ai-provider-and-major-movie-studio/), [The Verge](https://www.theverge.com/2024/9/18/24248115/lionsgate-runway-ai-deal)

# Apple's Accelerating AI Evolution

- Apple Releases iOS 18.1 Beta with New AI Features - [MacRumors](https://www.macrumors.com/2024/09/19/apple-seeds-first-ios-18-1-public-beta/), [The Verge](https://www.theverge.com/2024/9/19/24249206/apple-intelligence-ios-18-1-public-beta)
- Apple Introduces UI-JEPA Models for On-Device AI - [VentureBeat](https://venturebeat.com/ai/apple-aims-for-on-device-user-intent-understanding-with-ui-jepa-models/)
- Apple's 'Intelligence' Features Expand to More Languages, Some Delayed to 2025 - [The Verge](https://www.theverge.com/2024/9/18/24247839/apple-intelligence-language-support-german-italian), [Business Insider](https://www.businessinsider.com/apple-intelligence-features-rollout-timeline-iphone-16-2024-9)
- Analysts Predict Apple Intelligence Will Boost iPhone 16 Sales - [Business Insider](https://www.businessinsider.com/apple-intelligence-will-drive-sales-for-iphone-16-analysts-say-2024-9)
- Apple Encourages AI Use Without Hardware Upgrades - [The New York Times](https://www.nytimes.com/2024/09/09/technology/personaltech/iphone-ai-upgrade.html)

# Robotaxis Gear Up for Expansion

- Cruise Plans Robotaxi Return in SF Amid Safety Reviews - [The Verge](https://www.theverge.com/2024/9/19/24249150/cruise-robotaxi-sf-return-manual-drive-autonomous)
- Waymo Discusses Robotaxi Partnership with Hyundai - [The Verge](https://www.theverge.com/2024/9/19/24249093/waymo-in-talks-with-hyundai-about-future-robotaxi-partnership)
- Waymo and Uber Expand Robotaxi Services to Austin and Atlanta - [The Verge](https://www.theverge.com/2024/9/13/24243397/waymo-uber-austin-atlanta-robotaxi-partnership)

# Global Push for Unified AI Regulation

- UN Proposes Global AI Governance Panel and Urgent Regulation - [Reuters](https://www.reuters.com/technology/artificial-intelligence/un-advisory-body-makes-seven-recommendations-governing-ai-2024-09-19/), [Ars Technica](https://arstechnica.com/ai/2024/09/united-nations-wants-to-treat-ai-with-same-urgency-as-climate-change/)
- Brazil's Lula Advocates for Inclusive Global AI Rules at G-20 - [Bloomberg](https://www.bloomberg.com/news/articles/2024-09-22/lula-seeks-to-lead-push-for-global-ai-rules-during-brazil-s-g-20)

# Microsoft Supercharges Office with Copilot AI

- Microsoft Introduces Copilot Pages for Collaborative AI Workflows - [The Verge](https://www.theverge.com/2024/9/16/24246010/microsoft-copilot-pages-multiplayer-ai-business)
- Microsoft Adds Advanced Copilot AI Features to Office Suite - [The Verge](https://www.theverge.com/2024/9/16/24246014/microsoft-office-copilot-ai-features-excel-python-outlook-word-powerpoint)
- Salesforce CEO Calls Microsoft's Copilot a 'Clippy' Revival - [Business Insider](https://www.businessinsider.com/marc-benioff-salesforce-microsoft-copilot-clippy-2024-9)
- Microsoft Enables Custom App Launch via Copilot Key in Windows 11 - [The Verge](https://www.theverge.com/2024/9/20/24250067/microsoft-windows-11-copilot-key-customization-apps)

# LinkedIn's AI Data Practices Under Scrutiny

- LinkedIn Confirms AI Training on User Data, Updates Privacy Settings - [The Washington Post](https://www.washingtonpost.com/technology/2024/09/23/linkedin-training-ai-setting-opt-out/), [The Verge](https://www.theverge.com/2024/9/18/24248471/linkedin-ai-training-user-accounts-data-opt-in), [The Register](https://www.theregister.com/2024/09/19/linkedin_ai_data_access/), [404 Media](https://www.404media.co/linkedin-is-training-ai-on-user-data-before-updating-its-terms-of-service/)
- LinkedIn Halts AI Training on European and UK User Data - [TechRadar](https://www.techradar.com/pro/security/the-linkedin-ai-saga-shows-us-the-need-for-eu-like-privacy-regulations)

# Jony Ive and OpenAI Aim to Redefine Computing

- Jony Ive and OpenAI Collaborate on New AI Device, Seek $1B Funding - [MacRumors](https://www.macrumors.com/2024/09/23/jony-ive-working-on-new-device-openai/), [Google News](https://news.google.com/...)

# OpenAI's Advances and Funding Surge

- OpenAI's $6.5B Funding Round Oversubscribed as Investors Pile In - [Financial Times](https://ft.com/content/a8f9bd05-0999-415d-af86-c73e79ad5733), [Bloomberg](https://www.bloomberg.com/news/articles/2024-09-19/openai-to-decide-which-backers-to-let-into-6-5-billion-funding)
- OpenAI Releases o1 Models with Superior Reasoning but Ethical Concerns - [The Verge](https://www.theverge.com/2024/9/17/24243884/openai-o1-model-research-safety-alignment), [VentureBeat](https://venturebeat.com/programming-development/what-openais-new-o1-preview-and-o1-mini-models-mean-for-developers/), [Understanding AI](https://www.understandingai.org/p/openai-just-unleashed-an-alien-of)

# Microsoft Powers AI with Nuclear Energy

- Microsoft Signs 20-Year Deal to Reopen Three Mile Island Nuclear Plant for AI Data Centers - [The Verge](https://www.theverge.com/2024/9/20/24249770/microsoft-three-mile-island-nuclear-power-plant-deal-ai-data-centers), [Financial Times](https://www.ft.com/content/ddcb5ab6-965f-4034-96e1-7f668bad1801), [Ars Technica](https://arstechnica.com/ai/2024/09/re-opened-three-mile-island-will-power-ai-data-centers-under-new-deal/), [Business Insider](https://www.businessinsider.com/three-mile-island-nuclear-plant-reopens-power-microsoft-ai-push-2024-9)

# Salesforce's AI Ambitions

- Salesforce CEO Criticizes Microsoft's Copilot as Ineffective - [Business Insider](https://www.businessinsider.com/marc-benioff-salesforce-microsoft-copilot-clippy-2024-9)
- Salesforce Develops Agentforce AI Inspired by Steve Jobs - [VentureBeat](https://venturebeat.com/ai/salesforce-ceo-marc-benioff-reveals-steve-jobs-influence-on-agentforce-ai-strategy/)
- Salesforce and NVIDIA See Huge Potential in Agentic AI - [VentureBeat](https://venturebeat.com/ai/why-jensen-huang-and-marc-benioff-see-gigantic-opportunity-for-agentic-ai/)

# SiFive Launches RISC-V AI Chips

- SiFive Launches RISC-V Intelligence XM AI Chip Series for AI Workloads - [VentureBeat](https://venturebeat.com/ai/sifive-unveils-risc-v-chip-design-for-high-performance-ai-workloads/), [The Register](https://www.theregister.com/2024/09/19/sifive_ai_accelerator/), [SiliconANGLE](https://siliconangle.com/2024/09/18/risc-v-guardian-sifive-unveils-new-chip-designs-low-powered-ai-edge/)

# Google Enhances NotebookLM with AI Features

- Google Enhances NotebookLM with New AI Features for Enterprise and Creative Use - [Geeky Gadgets](https://www.geeky-gadgets.com/notes-into-podcasts-with-notebooklm/), [Understanding AI](https://www.understandingai.org/p/openai-just-unleashed-an-alien-of), [Google News](https://news.google.com/...)

# YouTube Enhances Creativity with AI

- YouTube Launches AI-Powered Inspiration Tab for Video Ideas - [The Verge](https://www.theverge.com/2024/9/18/24247559/youtube-ai-videos-veo-inspiration-tab)
- YouTube Integrates Veo AI into Shorts, Adds Multilingual Dubbing - [TechCrunch](https://techcrunch.com/2024/09/18/youtube-shorts-to-integrate-veo-google-ai-video-model/)
"""

In [None]:
def fn_is_revision_complete(state: AgentState) -> str:
    """update edit_complete if MAX_EDITS exceeded"
    return "complete" if edit_complete else "incomplete"
    """

    if state["n_edits"] >= MAX_EDITS:
        log("Max edits reached")
        state["edit_complete"] = True


    return "complete" if state["edit_complete"] else "incomplete"


if DEBUG:
    fn_is_revision_complete(test_state)


In [None]:
def fn_send_mail(state: AgentState) -> AgentState:

    log("Sending summary email")
    # Convert Markdown to HTML
    html_str = markdown.markdown(state['summary'], extensions=['extra'])
    # send email
    subject = f'AI news summary {datetime.now().strftime("%H:%M:%S")}'
    send_gmail(subject, html_str)
    return state


if DEBUG:
    fn_send_mail(test_state)

In [None]:
class Agent:

    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        self.graph = graph
        graph.add_node("initialize", self.initialize)
        graph.add_node("download_sources", self.download_sources)
        graph.add_node("extract_urls", self.extract_urls)
        graph.add_node("filter_urls", self.filter_urls)
        graph.add_node("topic_analysis", self.topic_analysis)
        graph.add_node("topic_clusters", self.topic_clusters)
        graph.add_node("download_pages", self.download_pages)
        graph.add_node("summarize_pages", self.summarize_pages)
        graph.add_node("propose_topics", self.propose_topics)
        graph.add_node("compose_summary", self.compose_summary)
        graph.add_node("rewrite_summary", self.rewrite_summary)
        graph.add_node("send_mail", self.send_mail)

        graph.add_edge(START, "initialize")
        graph.add_edge("initialize", "download_sources")
        graph.add_edge("download_sources", "extract_urls")
        graph.add_edge("extract_urls", "filter_urls")
        graph.add_edge("filter_urls", "topic_analysis")
        graph.add_edge("topic_analysis", "topic_clusters")
        graph.add_edge("topic_clusters", "download_pages")
        graph.add_edge("download_pages", "summarize_pages")
        graph.add_edge("summarize_pages", "propose_topics")
        graph.add_edge("propose_topics", "compose_summary")
        graph.add_edge("compose_summary", "rewrite_summary")
        graph.add_conditional_edges("rewrite_summary",
                                    self.is_revision_complete,
                                    {"incomplete": "rewrite_summary",
                                     "complete": "send_mail",
                                    })
        graph.add_edge("send_mail", END)

        checkpointer = SqliteSaver.from_conn_string(":memory:")
        app = graph.compile(checkpointer=checkpointer, interrupt_before=["compose_summary",])
        self.app = app

#         self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def initialize(self, state: AgentState) -> AgentState:
        self.state = fn_initialize(state)
        return self.state

    def download_sources(self, state: AgentState) -> AgentState:
        self.state = fn_download_sources(state)
        return self.state

    def extract_urls(self, state: AgentState) -> AgentState:
        self.state = fn_extract_urls(state)
        return self.state

    def filter_urls(self, state: AgentState) -> AgentState:
        self.state = fn_filter_urls(state)
        return self.state

    def topic_analysis(self, state: AgentState) -> AgentState:
        self.state = fn_topic_analysis(state)
        return self.state

    def topic_clusters(self, state: AgentState) -> AgentState:
        self.state = fn_topic_clusters(state)
        return self.state

    def download_pages(self, state: AgentState) -> AgentState:
        self.state = fn_download_pages(state)
        return self.state

    def summarize_pages(self, state: AgentState) -> AgentState:
        self.state = fn_summarize_pages(state)
        return self.state

    def propose_topics(self, state: AgentState) -> AgentState:
        self.state = fn_propose_cats(state)
        return self.state

    def compose_summary(self, state: AgentState) -> AgentState:
        self.state = fn_compose_summary(state)
        return self.state

    def rewrite_summary(self, state: AgentState) -> AgentState:
        self.state = fn_rewrite_summary(state)
        return self.state

    def is_revision_complete(self, state: AgentState) -> str:
        return fn_is_revision_complete(state)

    def send_mail(self, state: AgentState) -> AgentState:
        self.state = fn_send_mail(state)
        return self.state

    def run(self, config, state):
        self.state = state
        for step in self.app.stream(state, config, stream_mode="debug"):
            if step["type"] == "checkpoint":
                display(f'Step {step["step"]}')

            # Check if there is an error message in the state
#             if step["payload"].get("values") and step["payload"]["values"].get("error_message"):
#                 # Update the error state to 'tested' since error simulation is complete
#                 self.state['error_state'] = 'tested'
#                 # Retry from the last checkpoint saved by the checkpointer
#                 for retry_step in graph.stream(self.state, config, stream_mode="debug"):
#                     if retry_step["type"] == "checkpoint":
#                         print(retry_step["step"], retry_step["payload"].get("values"))
#                 break
        return None


In [None]:
model = ChatOpenAI(model=MODEL)
lg_agent = Agent(model, [], system="")


In [None]:
dir(lg_agent.graph)

In [None]:
# Image(lg_agent.graph.get_graph().draw_png())
Image(lg_agent.app.get_graph().draw_mermaid_png())


In [None]:
# Configuration with thread ID for checkpointing
# Generate a random UUID
thread_id = uuid.uuid4()

config = {"configurable": {"thread_id": thread_id}}
# initial state
state = AgentState()
do_download = True
state["do_download"] = do_download
# before_date="2024-07-22 10:00:00"
# state["before_date"] = before_date
log(f"Starting with before_date={state.get('before_date')}, do_download={do_download}, thread_id={thread_id}")
lg_agent.run(config, state)


In [None]:
user_approval = input("Edit topics? (yes/no): ")


In [None]:
if user_approval.lower() == "yes":
    # If approved, continue the graph execution
    for step in lg_agent.graph.stream(None, config, stream_mode="debug"):
        if step["type"] == "checkpoint":
            display(f'Step {step["step"]}')
else:
    print("Operation cancelled by user.")


In [None]:
# get last state
last_state = lg_agent.graph.get_state(config)
pd.DataFrame(last_state.values['AIdf'])


In [None]:
# update state as node
# lg_agent.graph.update_state(config, {"foo": 2, "bar": ["b"]}, as_node="revise_summary")


In [None]:
# resume from error starting at a node
# result = lg_agent.graph.invoke(None, {'configurable': {'thread_id': 'newsbot-thread-5'}, "next": "topic_analysis"})