In [None]:
!pip install google-generativeai

In [None]:
# Gemini-compatible imports
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import google.generativeai as genai 

# Connecting to Gemini

The next cell is where we load in the environment variables in your `.env` file and connect to OpenAI.

In [None]:
# Connecting to Gemini
load_dotenv(override=True)
api_key = os.getenv('GEMINI_API_KEY')  # Changed environment variable name

# Check the key
if not api_key:
    print("No Gemini API key found! Please:")
    print("1. Create a .env file in this folder")
    print("2. Add: GEMINI_API_KEY=your_actual_api_key_here")
    print("3. Get your API key from: https://aistudio.google.com/app/apikey")
elif not api_key.startswith("AIza"):
    print("Gemini API key found, but it doesn't start with 'AIza'")
    print("This suggests an invalid key format - get a new one from Google AI Studio")
elif api_key.strip() != api_key:
    print("Key detected, but it has leading/trailing whitespace!")
    print("Remove spaces/tabs around the key in your .env file")
else:
    print("Gemini API key validated successfully! 🎉")

In [None]:
!pip install --upgrade google-generativeai

# Check for tha available model in Gemini and choose form that to run you model

EG: Name: models/gemini-1.5-pro-latest
Description: Alias that points to the most recent production (non-experimental) release of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 million tokens.

In [None]:
import google.generativeai as genai

genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

for m in genai.list_models():
    if 'generateContent' in m.supported_generation_methods:
        print(f"Name: {m.name}")
        print(f"Description: {m.description}\n")

In [None]:
model = genai.GenerativeModel('gemini-1.5-pro-latest')  # New

# Generate AI Response with Token Limit  
This code uses an AI model to generate a response to a prompt, limiting the output to **1,000 tokens** 
(shorter than the default 2048). 
Replace `"Your prompt here"` with your input text. Useful for controlling response length or API costs.  
ps:( you can also use default but i had set it to 1000 token for convenience)

In [None]:
# Optional: Set max output tokens
response = model.generate_content(
    "Your prompt here",
    generation_config=genai.types.GenerationConfig(
        max_output_tokens=1000  # Default is 2048
    )
)

# Basic Gemini Model Connectivity Test  
This code verifies connectivity to Google's Gemini AI model. It:  
1. Loads the API key securely from a `.env` file.  
2. Initializes the `gemini-1.5-pro-latest` model.  
3. Sends a test prompt and prints a **5-word confirmation** to validate functionality.  

In [None]:
# Full test code
import google.generativeai as genai
from dotenv import load_dotenv
import os

load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

model = genai.GenerativeModel('gemini-1.5-pro-latest')
response = model.generate_content("Hello! Can you confirm you're working? Answer in 5 words.")
print(response.text)  # Should return short confirmation

# Let's make a quick call to a Frontier model to get started, as a preview!

In [None]:
# Load environment and configure
load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

# Initialize model
model = genai.GenerativeModel('gemini-1.5-pro-latest')  # Use your verified model name

# Generate response
message = "Hello, Gemini! This is my first ever message to you! Hi!"
response = model.generate_content(message)

# Handle response
if response.text:
    print(response.text)
else:
    print("Response blocked. Safety feedback:", response.prompt_feedback)

# Web Scraping & Content Cleaning  
This code scrapes a webpage and extracts clean text content:  
1. Uses a `User-Agent` header to mimic a browser.  
2. Removes scripts, styles, and other non-text elements.  
3. Stores the page title and cleaned body text in a `Website` object.  

In [None]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
        """
        Creates a Website object with cleaned text content from the URL
        - Scrapes page using requests/BeautifulSoup
        - Removes unnecessary elements (scripts, styles, etc.)
        - Stores cleaned text content
        """
        self.url = url
        
        # 1. Fetch webpage
        response = requests.get(url, headers=headers)
        
        # 2. Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # 3. Extract and clean content
        self.title = soup.title.string if soup.title else "No title found"
        for element in soup.body(["script", "style", "img", "input"]):
            element.decompose()  # Remove unwanted elements
        self.text = soup.body.get_text(separator="\n", strip=True)

In [None]:
google = Website("https://google.com")  # Initializes using your Website class

# Print results (no changes needed here)
print("Page Title:", ed.title)
print("\nCleaned Page Content:")
print(ed.text)

## Types of prompts

You may know this already - but if not, you will get very familiar with it!

Models like GPT4o have been trained to receive instructions in a particular way.

They expect to receive:

**A system prompt** that tells them what task they are performing and what tone they should use

**A user prompt** -- the conversation starter that they should reply to

In [None]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [None]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [None]:
print(user_prompt_for(google))

In [None]:
messages = [
    {"role": "system", "content": "You are a snarky assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

In [None]:
system_prompt = "You are an assistant... Respond in markdown."
user_prompt = "Summarize this website..."

messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
]

# Gemini version
full_prompt = f"""<SYSTEM_ROLE>
{system_prompt}
</SYSTEM_ROLE>

<USER_REQUEST>
{user_prompt}
</USER_REQUEST>"""

response = genai.GenerativeModel('gemini-1.5-pro-latest').generate_content(full_prompt)

## And now let's build useful messages for GEMINI, using a function

In [None]:
# See how this function creates exactly the format above

def messages_for(website):
    # Gemini uses single-string prompts instead of role-based messages
    return f"{system_prompt}\n\n{user_prompt_for(website)}"

In [None]:
# Try this out, and then try for a few more websites

messages_for(google)

## Time to bring it together - the API for Gemini is very simple!

In [None]:
# And now: call the Gemini API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = genai.GenerativeModel('gemini-1.5-pro-latest').generate_content(
        messages_for(website)  # Already returns combined prompt string
    )
    return response.text

In [None]:
summarize("https://cnn.com")

In [None]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [None]:
display_summary("https://www.blackbox.ai")

In [None]:
display_summary("https://anthropic.com")

## Conclusion
This Website Summarizer demonstrates how to combine web scraping, content cleaning, and Gemini's AI capabilities to create focused website summarizer model.