In [1]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI


In [2]:
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"

load_dotenv(override=True)

google_api_key = os.getenv("GOOGLE_API_KEY")

if not google_api_key:
    print("No API key was found - please be sure to add your key to the .env file, and save the file! Or you can skip the next 2 cells if you don't want to use Gemini")
elif not google_api_key.startswith("AIz"):
    print("An API key was found, but it doesn't start AIz")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [3]:

# A class to represent a Webpage
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)


marvel= Website("https://marvel.fandom.com/wiki/Marvel_Database")
print(marvel.title)
print(marvel.text)


Marvel Database | Fandom
Sign In
Register
Marvel Database
Explore
Main Page
Discuss
All Pages
Community
Interactive Maps
Recent Blog Posts
Media
Comics
This Week's Comics
Active Series
Announced Series
Imprints
MAX
Epic
Icon
Ultimate
Red Band
Handbooks
Manga
Movies
2022
Morbius
Doctor Strange in the Multiverse of Madness
Thor: Love and Thunder
Black Panther: Wakanda Forever
2023
Ant-Man and the Wasp: Quantumania
Guardians of the Galaxy Vol. 3
Spider-Man: Across the Spider-Verse
The Marvels
2024
Madame Web
Deadpool & Wolverine
Venom: The Last Dance
Kraven the Hunter
2025
Captain America: Brave New World
Thunderbolts*
Fantastic Four: First Steps
2026
Avengers: Doomsday
Spider-Man 4
2027
Avengers: Secret Wars
All Movies
TV
2022
Moon Knight
Ms. Marvel
She-Hulk: Attorney at Law
2023
Moon Girl and Devil Dinosaur (Season 1)
Secret Invasion
Loki (Season 2)
What If...? (Season 2)
2024
Echo
Moon Girl and Devil Dinosaur (Season 2)
X-Men '97 (Season 1)
Hit-Monkey (Season 2)
Agatha All Along
What I

In [4]:
system_prompt = "You are a marvel assistant that analyzes the contents of the marvel website \
and provides a short summary about all the characters in Featured Articles in one or two lines, \
Respond in markdown."

# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

print(user_prompt_for(marvel))


You are looking at a website titled Marvel Database | Fandom
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

Sign In
Register
Marvel Database
Explore
Main Page
Discuss
All Pages
Community
Interactive Maps
Recent Blog Posts
Media
Comics
This Week's Comics
Active Series
Announced Series
Imprints
MAX
Epic
Icon
Ultimate
Red Band
Handbooks
Manga
Movies
2022
Morbius
Doctor Strange in the Multiverse of Madness
Thor: Love and Thunder
Black Panther: Wakanda Forever
2023
Ant-Man and the Wasp: Quantumania
Guardians of the Galaxy Vol. 3
Spider-Man: Across the Spider-Verse
The Marvels
2024
Madame Web
Deadpool & Wolverine
Venom: The Last Dance
Kraven the Hunter
2025
Captain America: Brave New World
Thunderbolts*
Fantastic Four: First Steps
2026
Avengers: Doomsday
Spider-Man 4
2027
Avengers: Secret Wars
All Movies
TV
2022
Moon Knight
Ms. Marvel
She-Hulk: Attorney at Law
2023
Moon G

In [5]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

# To give you a preview -- calling OpenAI with system and user messages:
gemini = OpenAI(base_url=GEMINI_BASE_URL, api_key=google_api_key)

messages_for(marvel)


[{'role': 'system',
  'content': 'You are a marvel assistant that analyzes the contents of the marvel website and provides a short summary about all the characters in Featured Articles in one or two lines, Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled Marvel Database | Fandom\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nSign In\nRegister\nMarvel Database\nExplore\nMain Page\nDiscuss\nAll Pages\nCommunity\nInteractive Maps\nRecent Blog Posts\nMedia\nComics\nThis Week's Comics\nActive Series\nAnnounced Series\nImprints\nMAX\nEpic\nIcon\nUltimate\nRed Band\nHandbooks\nManga\nMovies\n2022\nMorbius\nDoctor Strange in the Multiverse of Madness\nThor: Love and Thunder\nBlack Panther: Wakanda Forever\n2023\nAnt-Man and the Wasp: Quantumania\nGuardians of the Galaxy Vol. 3\nSpider-Man: Across the Spider-Verse\nThe Marvels\n2

In [6]:
def summarize(url):
    website = Website(url)
    response = gemini.chat.completions.create(
        model = "gemini-2.5-flash-lite",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

summarize("https://marvel.fandom.com/wiki/Marvel_Database")


'The Marvel Database is an extensive wiki dedicated to all things Marvel, featuring detailed information on characters, comics, movies, TV shows, games, and events. It serves as a comprehensive resource for fans, with featured articles highlighting key characters like Franklin Richards, Spider-Man, and Iron Man. The site also provides updates on upcoming and recent media releases, including movies like *Avengers: Doomsday* and *The Fantastic Four: First Steps*, and TV shows such as *Wonder Man* and *Marvel Zombies*.'

In [7]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

display_summary("https://marvel.fandom.com/wiki/Marvel_Database")

The Marvel Database | Fandom is an extensive wiki dedicated to all things Marvel, covering characters, comics, movies, TV shows, games, and events. It features a comprehensive list of characters like Franklin Richards, Spider-Man, Iron Man, and Captain America, detailing their histories and affiliations. The site also highlights upcoming and recent Marvel media, including movies like "Avengers: Doomsday" and "The Fantastic Four: First Steps," as well as TV shows such as "Wonder Man" and "Marvel Zombies." News and announcements are integrated through "Featured Articles" and "This Week's Comics," showcasing new releases and significant storylines like "Ultimate Endgame" and "Age of Revelation."