In [None]:
!pip install google-generativeai

In [4]:
# Gemini-compatible imports
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import google.generativeai as genai 

# Connecting to Gemini

The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI.

In [5]:
# Connecting to Gemini
load_dotenv(override=True)
api_key = os.getenv('GEMINI_API_KEY')  # Changed environment variable name

# Check the key
if not api_key:
    print("No Gemini API key found! Please:")
    print("1. Create a .env file in this folder")
    print("2. Add: GEMINI_API_KEY=your_actual_api_key_here")
    print("3. Get your API key from: https://aistudio.google.com/app/apikey")
elif not api_key.startswith("AIza"):
    print("Gemini API key found, but it doesn't start with 'AIza'")
    print("This suggests an invalid key format - get a new one from Google AI Studio")
elif api_key.strip() != api_key:
    print("Key detected, but it has leading/trailing whitespace!")
    print("Remove spaces/tabs around the key in your .env file")
else:
    print("Gemini API key validated successfully! 🎉")

Gemini API key validated successfully! 🎉


In [6]:
!pip install --upgrade google-generativeai



# Check for tha available model in Gemini and choose form that to run you model

EG: Name: models/gemini-1.5-pro-latest
Description: Alias that points to the most recent production (non-experimental) release of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens.

In [7]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(f"Name: {m.name}")
        print(f"Description: {m.description}\n")

Name: models/gemini-1.0-pro-vision-latest
Description: The original Gemini 1.0 Pro Vision model version which was optimized for image understanding. Gemini 1.0 Pro Vision was deprecated on July 12, 2024. Move to a newer Gemini version.

Name: models/gemini-pro-vision
Description: The original Gemini 1.0 Pro Vision model version which was optimized for image understanding. Gemini 1.0 Pro Vision was deprecated on July 12, 2024. Move to a newer Gemini version.

Name: models/gemini-1.5-pro-latest
Description: Alias that points to the most recent production (non-experimental) release of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens.

Name: models/gemini-1.5-pro-001
Description: Stable version of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens, released in May of 2024.

Name: models/gemini-1.5-pro-002
Description: Stable version of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens, releas

In [8]:
model = genai.GenerativeModel('gemini-1.5-pro-latest')  # New

# Generate AI Response with Token Limit  
This code uses an AI model to generate a response to a prompt, limiting the output to **1,000 tokens** 
(shorter than the default 2048). 
Replace `"Your prompt here"` with your input text. Useful for controlling response length or API costs.  
ps:( you can also use default but i had set it to 1000 token for convenience)

In [9]:
# Optional: Set max output tokens
response = model.generate_content(
    "Your prompt here",
    generation_config=genai.types.GenerationConfig(
        max_output_tokens=1000  # Default is 2048
    )
)

# Basic Gemini Model Connectivity Test  
This code verifies connectivity to Google's Gemini AI model. It:  
1. Loads the API key securely from a `.env` file.  
2. Initializes the `gemini-1.5-pro-latest` model.  
3. Sends a test prompt and prints a **5-word confirmation** to validate functionality.  

In [10]:
# Full test code
import google.generativeai as genai
from dotenv import load_dotenv
import os

load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-pro-latest')
response = model.generate_content("Hello! Can you confirm you're working? Answer in 5 words.")
print(response.text)  # Should return short confirmation

Yes, I am functioning correctly.



# Let's make a quick call to a Frontier model to get started, as a preview!

In [11]:
# Load environment and configure
load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

# Initialize model
model = genai.GenerativeModel('gemini-1.5-pro-latest')  # Use your verified model name

# Generate response
message = "Hello, Gemini! This is my first ever message to you! Hi!"
response = model.generate_content(message)

# Handle response
if response.text:
    print(response.text)
else:
    print("Response blocked. Safety feedback:", response.prompt_feedback)

Hello! It's great to hear from you!  I'm excited to be your first Gemini interaction.  What can I do for you today?



# Web Scraping & Content Cleaning  
This code scrapes a webpage and extracts clean text content:  
1. Uses a `User-Agent` header to mimic a browser.  
2. Removes scripts, styles, and other non-text elements.  
3. Stores the page title and cleaned body text in a `Website` object.  

In [12]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
        """
        Creates a Website object with cleaned text content from the URL
        - Scrapes page using requests/BeautifulSoup
        - Removes unnecessary elements (scripts, styles, etc.)
        - Stores cleaned text content
        """
        self.url = url
        
        # 1. Fetch webpage
        response = requests.get(url, headers=headers)
        
        # 2. Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # 3. Extract and clean content
        self.title = soup.title.string if soup.title else "No title found"
        for element in soup.body(["script", "style", "img", "input"]):
            element.decompose()  # Remove unwanted elements
        self.text = soup.body.get_text(separator="\n", strip=True)

In [38]:
google = Website("https://google.com")  # Initializes using your Website class

# Print results (no changes needed here)
print("Page Title:", ed.title)
print("\nCleaned Page Content:")
print(ed.text)

Page Title: Google

Cleaned Page Content:
About
Store
Gmail
Images
Sign in
See more
Delete
Delete
Report inappropriate predictions
Google offered in:
हिन्दी
বাংলা
తెలుగు
मराठी
தமிழ்
ગુજરાતી
ಕನ್ನಡ
മലയാളം
ਪੰਜਾਬੀ
India
Advertising
Business
How Search works
Privacy
Terms
Settings
Search settings
Advanced search
Your data in Search
Search history
Search help
Send feedback
Dark theme: Off
Google apps


## Types of prompts

You may know this already - but if not, you will get very familiar with it!

Models like GPT4o have been trained to receive instructions in a particular way.

They expect to receive:

**A system prompt** that tells them what task they are performing and what tone they should use

**A user prompt** -- the conversation starter that they should reply to

In [33]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [34]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [39]:
print(user_prompt_for(google))

You are looking at a website titled Google
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

About
Store
Gmail
Images
Sign in
See more
Delete
Delete
Report inappropriate predictions
Google offered in:
हिन्दी
বাংলা
తెలుగు
मराठी
தமிழ்
ગુજરાતી
ಕನ್ನಡ
മലയാളം
ਪੰਜਾਬੀ
India
Advertising
Business
How Search works
Privacy
Terms
Settings
Search settings
Advanced search
Your data in Search
Search history
Search help
Send feedback
Dark theme: Off
Google apps


In [40]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [42]:
system_prompt = "You are an assistant... Respond in markdown."
user_prompt = "Summarize this website..."

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]

# Gemini version
full_prompt = f"""<SYSTEM_ROLE>
{system_prompt}
</SYSTEM_ROLE>

<USER_REQUEST>
{user_prompt}
</USER_REQUEST>"""

response = genai.GenerativeModel('gemini-1.5-pro-latest').generate_content(full_prompt)

## And now let's build useful messages for GEMINI, using a function

In [43]:
# See how this function creates exactly the format above

def messages_for(website):
    # Gemini uses single-string prompts instead of role-based messages
    return f"{system_prompt}\n\n{user_prompt_for(website)}"

In [44]:
# Try this out, and then try for a few more websites

messages_for(google)

'You are an assistant... Respond in markdown.\n\nYou are looking at a website titled Google\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nAbout\nStore\nGmail\nImages\nSign in\nSee more\nDelete\nDelete\nReport inappropriate predictions\nGoogle offered in:\nहिन्दी\nবাংলা\nతెలుగు\nमराठी\nதமிழ்\nગુજરાતી\nಕನ್ನಡ\nമലയാളം\nਪੰਜਾਬੀ\nIndia\nAdvertising\nBusiness\nHow Search works\nPrivacy\nTerms\nSettings\nSearch settings\nAdvanced search\nYour data in Search\nSearch history\nSearch help\nSend feedback\nDark theme: Off\nGoogle apps'

## Time to bring it together - the API for Gemini is very simple!

In [47]:
# And now: call the Gemini API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = genai.GenerativeModel('gemini-1.5-pro-latest').generate_content(
        messages_for(website)  # Already returns combined prompt string
    )
    return response.text

In [50]:
summarize("https://cnn.com")

"This appears to be the CNN homepage.  It features a variety of news stories, including:\n\n* **Politics:** Trump signing an executive order to begin dismantling the Department of Education, Zelensky's call with Trump, and clashing voters with lawmakers during town halls.\n* **World:**  The world's happiest countries, Eddie Jordan's death, the IOC presidential election,  Russia's Black Sea strategy in the Ukraine war, Israeli settlers seizing land in the West Bank, and Hamas firing rockets at Israel after a truce collapse.\n* **Other:** Mikey Madison's SNL debut, Sia's divorce, the spring equinox, stories about retiring abroad, and numerous science, technology, health, style, and business pieces.\n\nThe page also includes navigation to various sections like US news, World, Politics, Business, Health, Entertainment, Style, Travel, Sports, Science, Climate, Weather, Ukraine-Russia War, Israel-Hamas War, and Games.  There are options to watch live TV, listen to podcasts, and play games.  

In [51]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [52]:
display_summary("https://www.blackbox.ai")

BLACKBOX.AI - Summary

This website appears to be a platform for AI-powered tools and resources.  Without more specific content, it's difficult to provide a detailed summary. However, based on the name, it likely offers features such as:

* **AI Model Development/Training:**  Potentially provides tools or services for building and training machine learning models.
* **Data Analysis/Processing:** Could include tools for data preprocessing, visualization, and analysis.
* **AI-Powered Applications:**  May offer pre-built AI applications for specific tasks or industries.

**News and Announcements:**

No news or announcements were provided in the prompt, therefore this section cannot be summarized.  Please provide the website content for a more complete summary.


In [53]:
display_summary("https://anthropic.com")

This website is the homepage for Anthropic, an AI safety and research company.  They are announcing the release of their most intelligent AI model yet, **Claude 3.7 Sonnet**, a hybrid reasoning model.  They are also launching **Claude Code**, an AI tool designed for coding.  

Key features of the site include information on:

* **Claude:**  Anthropic's AI assistant, available via API and with various pricing plans.
* **API:**  Developers can build AI-powered applications using Claude.
* **Research:**  Anthropic publishes research on AI safety and model development, including insights on Claude's "extended thinking".  Highlighted research papers include topics like Constitutional AI and core views on AI safety.
* **Careers:**  The company is hiring for various roles.
* **Solutions:**  Claude is positioned for coding and customer support applications.


The news highlights the release of Claude 3.7 Sonnet and Claude Code.


## Conclusion
This Website Summarizer demonstrates how to combine web scraping, content cleaning, and Gemini's AI capabilities to create focused website summarizer model.