In [17]:
# Install required packages if not already installed
try:
    import dotenv
except ImportError:
    import sys
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
    import dotenv


In [18]:
import os 
from dotenv import load_dotenv
from scraper import fetch_website_contents
from IPython.display import Markdown, display
from groq import Groq

In [19]:
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")

if not api_key:
    print("No API key found in .env file")

elif not api_key.startswith("gsk_"):
    print("Invalid API key format. It should start with 'gsk_'")
elif api_key.strip() !=api_key:
    print("API key contains whitespace")
    
else:
    print("API key is found and looks valid")


API key is found and looks valid


In [20]:
message ="hello, Groq! This is my first ever messgae to you! Hi!"
messages=[{"role":"user","content":message}]
messages

[{'role': 'user',
  'content': 'hello, Groq! This is my first ever messgae to you! Hi!'}]

In [22]:
groq = Groq(api_key=api_key)

response = groq.chat.completions.create(
    model="llama-3.1-8b-instant",
    messages=messages,)

response.choices[0].message.content

"Hello! Welcome to our world of chat and conversation. I'm excited to be your first message recipient. I'm Groq, but you can call me Groq for short. I'm here to listen, help, and chat with you about any topic that's on your mind. How's your day going so far?"

In [None]:
# testing the scraper function 
ed = fetch_website_contents("https://www.indiatoday.in/")

print(ed)

Latest News, Bihar Election Results 2025, Breaking News Today, Entertainment, Politics, Cricket - India Today

India Today
Aaj Tak
GNTTV
Lallantop
Business Today
Bangla
Malayalam
Northeast
BT Bazaar
Harper's Bazaar
Sports Tak
Crime Tak
Astro Tak
Gaming
Brides Today
Cosmopolitan
Kisan Tak
Ishq FM
India Today Hindi
Reader’s Digest
India Today
Aaj Tak
GNTTV
Lallantop
Business Today
Bangla
Malayalam
Northeast
BT Bazaar
Harper's Bazaar
Sports Tak
Magazine
Live TV
Search
SEARCH
SIGN IN
Edition
IN
IN
US
Home
TV
Live TV
Primetime
Magazine
Latest Edition
Insight
Best Colleges
Election Hub
Elections
Bihar Assembly
Bihar Constituencies
Life+Style
India
South
Global
All World News
US News
Canada News
UK News
China News
Indians Abroad
Business
All Sports
Women's World Cup
Women's World Cup Schedule
Women's World Cup Points Table
Tennis
Cricket
Football
Sports Today
Technology
Showbuzz
Entertainment
Bollywood
Hollywood
Television
OTT
Latest Reviews
Newspresso
Specials
Podcasts
Sunday Special
History

In [30]:
system_prompt = """You are a helpful assistant that analyzes the contents of a website,
and provides a short, summary, ignoring text that might be navigation related.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.

"""

In [31]:
user_prompt_prefix = """Here are the contents of a website.
Provide a short summary of this website.
If it includes news or announcements, then summarize these too.

"""

In [42]:
# this cell is for testing the model with a simple user prompt , it is not used in the main code
messages =[ 
           {"role":"system","content":"Your are a bad assistant"},
           {"role":"user","content":"what is 2+2?"}
           ]

response = groq.chat.completions.create(
    model="llama-3.1-8b-instant",
    messages=messages,)

response.choices[0].message.content

"*pauses for a long time* Um, I think it's... *shrugs* 5?"

In [33]:
def messages_for(website):
    return [
        {"role":"system","content":system_prompt},
        {"role":"user","content":user_prompt_prefix + website}
    ]

In [None]:
# testing the messages_for function 
messages_for(ed)

[{'role': 'system',
  'content': 'You are a helpful assistant that analyzes the contents of a website,\nand provides a short, summary, ignoring text that might be navigation related.\nRespond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.\n\n'},
 {'role': 'user',
  'content': "Here are the contents of a website.\nProvide a short summary of this website.\nIf it includes news or announcements, then summarize these too.\n\nLatest News, Bihar Election Results 2025, Breaking News Today, Entertainment, Politics, Cricket - India Today\n\nIndia Today\nAaj Tak\nGNTTV\nLallantop\nBusiness Today\nBangla\nMalayalam\nNortheast\nBT Bazaar\nHarper's Bazaar\nSports Tak\nCrime Tak\nAstro Tak\nGaming\nBrides Today\nCosmopolitan\nKisan Tak\nIshq FM\nIndia Today Hindi\nReader’s Digest\nIndia Today\nAaj Tak\nGNTTV\nLallantop\nBusiness Today\nBangla\nMalayalam\nNortheast\nBT Bazaar\nHarper's Bazaar\nSports Tak\nMagazine\nLive TV\nSearch\nSEARCH\nSIGN IN\nEdition\nIN\

In [35]:
def summarize(url):
    website = fetch_website_contents(url)
    response = groq.chat.completions.create(
        model="llama-3.1-8b-instant",
        messages=messages_for(website)
    )
    return response.choices[0].message.content

In [36]:
summarize("https://www.indiatoday.in/")

"# India Today Website Summary\n\n* The website provides news and updates on various topics including politics, sports, entertainment, and business.\n* Recent and breaking news includes updates on the Bihar Assembly elections, Lalu Yadav's family feud, and the performances of various politicians such as Nitish Kumar, Tejashwi Yadav, and PM Modi.\n\n## Top News Stories\n\n* Lalu Yadav's daughters leave Patna residence amid party feud due to internal crisis \n* RJD suffers worst poll performance in Bihar, winning only 25 seats out of 75.\n* Bihar Cabinet formation under NDA is underway."

In [37]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [38]:
display_summary("https://cnn.com")


# CNN Website Summary
This website is the official website of CNN, a leading news and media organization. It features a broad range of topics including news, politics, business, entertainment, sports, science, climate, and more.
## News and Announcements
The website showcases current events and breaking news in various sections, such as the Ukraine-Russia War, Israel-Hamas War, and 2025 Elections. It also includes feature articles and special reports on topics like climate change, innovation, and health.
## Sections and Features
- Breaking news and videos
- Politics, business, entertainment, and sports sections
- Live TV, listen, and watch options
- International and localized editions
- Various topic-specific sections like technology, health, and climate
- A feedback form for users to report ad issues

In [41]:
display_summary("https://anthropic.com")


# Research and Products for Responsible AI Development

Anthropic is a public benefit corporation dedicated to securing the benefits and mitigating the risks of AI. They aim to put safety at the forefront of AI research and development. The website highlights their AI research and products, including Claude, a large language model designed to serve humanity's long-term well-being.

## Recent Announcements

- **Introducing Claude Sonnet 4.5**: Considered the best model in the world for agents, coding, and computer use.
- **Introducing Claude Haiku 4.5**: Manages context on the Claude Developer Platform.
 
The company emphasizes the need for intentional pauses to consider the effects of powerful AI technologies on society.