In [1]:
from langchain.callbacks import get_openai_callback
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI
import os 
import json 
from dotenv import load_dotenv
load_dotenv()
import requests
from newspaper import Article

In [3]:
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'
}

article_url = "https://www.artificialintelligence-news.com/2022/01/25/meta-claims-new-ai-supercomputer-will-set-records/"

session = requests.Session()

try:
  response = session.get(article_url, headers=headers, timeout=10)
  
  if response.status_code == 200:
      article = Article(article_url)
      article.download()
      article.parse()
      
      print(f"Title: {article.title}")
      print(f"Text: {article.text}")
  else:
      print(f"Failed to fetch article at {article_url}")
except Exception as e:
    print(f"Error occurred while fetching article at {article_url}: {e}")

None
Title: Meta claims its new AI supercomputer will set records
Text: Ryan Daws is a senior editor at TechForge Media, with a seasoned background spanning over a decade in tech journalism. His expertise lies in identifying the latest technological trends, dissecting complex topics, and weaving compelling narratives around the most cutting-edge developments. His articles and interviews with leading industry figures have gained him recognition as a key influencer by organisations such as Onalytica. Publications under his stewardship have since gained recognition from leading analyst houses like Forrester for their performance. Find him on X (@gadget_ry) or Mastodon (@gadgetry@techhub.social)

Meta (formerly Facebook) has unveiled an AI supercomputer that it claims will be the world’s fastest.

The supercomputer is called the AI Research SuperCluster (RSC) and is yet to be fully complete. However, Meta’s researchers have already begun using it for training large natural language process

In [4]:
from langchain.schema import (
    HumanMessage
)

# we get the article data from the scraping part
article_title = article.title
article_text = article.text

# prepare template for prompt
template = """
As an advanced AI, you've been tasked to summarize online articles into bulleted points. Here are a few examples of how you've done this in the past:

Example 1:
Original Article: 'The Effects of Climate Change
Summary:
- Climate change is causing a rise in global temperatures.
- This leads to melting ice caps and rising sea levels.
- Resulting in more frequent and severe weather conditions.

Example 2:
Original Article: 'The Evolution of Artificial Intelligence
Summary:
- Artificial Intelligence (AI) has developed significantly over the past decade.
- AI is now used in multiple fields such as healthcare, finance, and transportation.
- The future of AI is promising but requires careful regulation.

Now, here's the article you need to summarize:

==================
Title: {article_title}

{article_text}
==================

Please provide a summarized version of the article in a bulleted list format.
"""

# Format the Prompt
prompt = template.format(article_title=article.title, article_text=article.text)

messages = [HumanMessage(content=prompt)]

In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback
# load the model
chat = ChatOpenAI(model_name="gpt-4", temperature=0.0)

# generate summary
with get_openai_callback() as cb:
    summary = chat(messages)
    print(summary.content)
    print(cb)

- Meta (formerly Facebook) has announced an AI supercomputer, the AI Research SuperCluster (RSC), which it claims will be the world's fastest.
- The supercomputer is not yet fully built, but is already being used by Meta's researchers for training large natural language processing and computer vision models.
- The RSC is expected to be fully operational by mid-2022 and will be capable of training models with trillions of parameters.
- The supercomputer is intended to help build new AI systems for real-time voice translations and other applications, and is seen as a key technology for the development of the metaverse.
- Meta expects the RSC to be 20 times faster than its current V100-based clusters, 9 times faster at running the NVIDIA Collective Communication Library, and 3 times faster at training large-scale NLP workflows.
- The RSC is designed with security and privacy controls to allow Meta to use real-world examples from its production systems in training.
- This will enable Meta 

In [6]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import validator
from pydantic import BaseModel, Field
from typing import List


# create output parser class
class ArticleSummary(BaseModel):
    title: str = Field(description="Title of the article")
    summary: List[str] = Field(description="Bulleted list summary of the article")

    # validating whether the generated summary has at least three lines
    @validator('summary', allow_reuse=True)
    def has_three_or_more_lines(cls, list_of_lines):
        if len(list_of_lines) < 3:
            raise ValueError("Generated summary has less than three bullet points!")
        return list_of_lines

# set up output parser
parser = PydanticOutputParser(pydantic_object=ArticleSummary)

In [7]:
from langchain.prompts import PromptTemplate


# create prompt template
# notice that we are specifying the "partial_variables" parameter
template = """
You are a very good assistant that summarizes online articles.

Here's the article you want to summarize.

==================
Title: {article_title}

{article_text}
==================

{format_instructions}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["article_title", "article_text"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

# Format the prompt using the article title and text obtained from scraping
formatted_prompt = prompt.format_prompt(article_title=article_title, article_text=article_text)

In [11]:
from langchain.llms import OpenAI

# instantiate model class
model = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0.0)

# Use the model to generate a summary
output = model(formatted_prompt.to_string())
print(f"Output:\n{output}")
# Parse the output into the Pydantic model
parsed_output = parser.parse(output.split("\"]}")[0] + "\"]}")
print(f"Parsed Output:\n{parsed_output}")

Output:
Here is the output:
```
{
  "title": "Meta claims its new AI supercomputer will set records",
  "summary": [
    "Meta (formerly Facebook) has unveiled an AI supercomputer called the AI Research SuperCluster (RSC) that is set to be the world's fastest once completed in mid-2022.",
    "The RSC is already being used by Meta's researchers for training large NLP and computer vision models.",
    "Meta hopes that the RSC will pave the way for building technologies for the metaverse, where AI-driven applications and products will play a crucial role.",
    "The RSC is estimated to be 20x faster than Meta's current V100-based clusters and 9x faster at running the NVIDIA Collective Communication Library (NCCL).",
    "Meta's previous AI research infrastructure only used open source and publicly-available datasets, but the RSC was designed with security and privacy controls in mind to allow for the use of real-world data from Meta's production systems.",
    "This will enable Meta to a