#**Article Summarizer**

In [1]:
#@title Install Libraries
!pip install python-dotenv --quiet
!pip install beautifulsoup4 --quiet
!pip install langchain --quiet
!pip install openai --quiet


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.1/49.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h

## Set up Google Colab and Environment Variables


In [2]:
#@title Setup Code

import os
from dotenv import load_dotenv, dotenv_values

# mount google drive to access project path
from google.colab import drive
drive.mount('/content/gdrive')

project_path = '/content/gdrive/MyDrive/Colab Notebooks/'

# get dotenv file from project folder
dotenv_file = project_path + '.env'

print(f"Checks that API keys are available\nEnvironment Vars loaded: {load_dotenv(dotenv_file)}\n")

# assign all API keys to variables and add variables to 'keys' list
OPEN_API_KEY = os.getenv('OPENAI_API_KEY')
keys = [OPEN_API_KEY]

def keys_available(keys):
  """checks that keys successfully imported"""
  return [type(key) for key in keys]

# Should be string. If NoneType in list, API import failed for that particular key.
print(f"All API keys should be strings: {keys_available(keys)}")

Mounted at /content/gdrive
Checks that API keys are available
Environment Vars loaded: True

All API keys should be strings: [<class 'str'>]


## Scrape Articles from Source

In [3]:
#@title Scraper Code

import requests
import time as t
from bs4 import BeautifulSoup
from enum import Enum
from datetime import datetime, timedelta

url = "https://venturebeat.com/category/ai/"

class Date(Enum):
  today = 'today'
  yesterday = 'yesterday'
  more = 'more'

  def __str__(self):
      return self.value

def get_text(detail_url):
  info = BeautifulSoup(requests.get(detail_url).text, 'lxml')
  return '\n'.join([p.text.strip() for p in info.select_one('div.article-content').findChildren('p', recursive=False)])


def scrape(date = Date.today):
  rr = requests.get( url )

  soup = BeautifulSoup(rr.text, "lxml")


  articles= soup.find_all('article', class_='ArticleListing')

  print('#### input ###')
  print('date: ', date)
  print('#### === ### \n')
  data = []
  for article in articles:
    t.sleep(5)
    if not article.time:
      continue
    time = article.time.text.strip()
    item = {
        'title': article.h2.text.strip(),
        'url': article.a['href'],
        'time': time
    }
    time_obj = datetime.strptime(time, '%B %d, %Y %H:%M %p').date()
    if date == Date.today:
      if time_obj == datetime.today().date():
        item['text'] = get_text(article.a['href'])
        data.append(item)
    elif date == Date.yesterday:
      if time_obj == (datetime.today() - timedelta(days=1)).date():
        item['text'] = get_text(article.a['href'])
        data.append(item)
    elif date == Date.more:
      item['text'] = get_text(article.a['href'])
      data.append(item)

  return data

In [4]:
#@title Select days to pull news {run: "auto"}

period = 'yesterday' #@param ['today', 'yesterday', 'more']

def get_date(period):
  if period == 'today':
    return Date.today
  elif period == 'yesterday':
    return Date.yesterday
  else:
    return Date.more

In [5]:
#@title Scraper Results
date = get_date(period)
data = scrape(date)
print('total articles:', len(data))

#### input ###
date:  yesterday
#### === ### 

total articles: 4


##Summarize Articles

In [6]:
#@title Functions to summarize articles
from IPython.display import Markdown
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    HumanMessage
)

def get_template(title, text):

  # prepare template for prompt
  template = """You are an advanced ai assistant that summarizes online articles.

  Here's the article you need to summarize:
  ==========================
  Title: {article_title}

  Text: {article_text}
  ==========================

  Write a summary of the previous article in 100 words or less.
  """

  prompt = template.format(article_title=title, article_text=text)

  messages = [HumanMessage(content=prompt)]
  return messages

def get_summary(messages):

  # instantiate model
  chat = ChatOpenAI(temperature=0)

  # generate summary
  summary = chat(messages)
  return summary.content

def get_output(article):
  title = article['title']
  url = article['url']
  date = article['time']
  text = article['text']

  messages = get_template(title, text)
  summary = get_summary(messages)
  output = f"**{title}**<br>{summary} [View Full]({url})<br><br>"
  return output

def summarize_articles(data):
  markdown_list = []
  for article in data:
    t.sleep(30) # can only process 3 requests per minute on free account
    output = get_output(article)
    markdown_list.append(output)

  markdown_string = ''.join(markdown_list)
  return markdown_string


In [7]:
#@title Article Summaries
summaries = summarize_articles(data)
Markdown(summaries)

**Perception Point launches AI model to combat generative AI-based BEC attacks**<br>Perception Point, an internet security platform, has launched a new AI-powered detection model to combat business email compromise (BEC) attacks facilitated by generative AI technologies. The model utilizes large language models (LLMs) and deep learning architecture to identify patterns in LLM-generated text, enabling the detection and prevention of sophisticated and personalized email threats. Perception Point's solution aims to address the limitations of conventional security vendors and offers a proactive approach by quarantining malicious emails before they reach the user's inbox. The model also incorporates a managed incident response service and exhibits exceptional speed in processing incoming emails. [View Full](https://venturebeat.com/ai/perception-point-launches-ai-model-to-combat-generative-ai-based-bec-attacks/)<br><br>**Capital One’s new chief scientist says ‘responsible, thoughtful’ generative AI is key**<br>Prem Natarajan, Capital One's new chief scientist and head of enterprise AI, believes that responsible and thoughtful implementation of generative AI is key for organizations. He sees a substantial opportunity for enterprises in the field of generative AI, particularly for those that have already committed to a technology transformation. Natarajan emphasizes the importance of operating generative AI in a responsible and inclusive manner, with diverse perspectives and considerations for different outcomes. Capital One is currently in a learning and experimenting phase with generative AI and large language models, with customer service being an early application contender. Natarajan's top priority is to build a world-class AI organization at Capital One. [View Full](https://venturebeat.com/ai/capital-ones-new-chief-scientist-says-responsible-thoughtful-generative-ai-is-key/)<br><br>**AI Foundation launches AI.XYZ to give people their own AI assistants**<br>AI Foundation has launched AI.XYZ, a platform that allows users to create their own AI assistants. The platform aims to promote a healthier work-life balance by offloading daily tasks to AI assistants. Unlike generic AI assistants from companies like Amazon and Google, AI.XYZ allows users to design unique AI assistants that know their values and goals, providing personalized help. The platform is available in public beta and offers both free and premium subscription options. AI Foundation believes that personal AI assistants can improve efficiency and productivity while protecting user data and privacy. [View Full](https://venturebeat.com/ai/ai-foundation-launches-ai-xyz-to-give-people-their-own-ai-assistants/)<br><br>**Inside the race to build an ‘operating system’ for generative AI**<br>Generative AI, which can auto-generate text, images, and code, is transforming the business world and could add $4.4 trillion to the global economy. However, enterprises face challenges in adopting this technology and need to develop an infrastructure to support the complex interactions between generative AI applications and other assets. This infrastructure can be likened to an operating system for generative AI, providing coordination, management, and monitoring capabilities. Intuit has developed its own platform, GenOS, encompassing data, development, runtime, and user experience layers. Other companies are also leveraging foundational large language models and open frameworks to enhance generative AI applications. [View Full](https://venturebeat.com/ai/inside-the-race-to-build-an-operating-system-for-generative-ai/)<br><br>