In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# requests
What it does:
Sends an HTTP request to a website (like when you open it in a browser).
Downloads the HTML code of the page so you can analyze it.

Why we need it:
Without this, you cannot fetch the website’s data for scraping.

In [4]:
response = requests.get("http://quotes.toscrape.com")

# beautifulsoup4 (BS4)
What it does:
Parses (reads and understands) the HTML you got from requests.
Lets you easily find and extract specific elements (like quotes, authors, tags).

In [5]:
# Found the first quote’s text using .find() and .get_text()

soup = BeautifulSoup(response.text, "html.parser")
quote = soup.find("span", class_="text").get_text()
print(quote)  # Prints one quote text

“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”


#  Extract All Quotes on One Page

In [11]:
quotes = []
authors = []
tags_list = []


In [12]:
import requests
from bs4 import BeautifulSoup

base_url = "http://quotes.toscrape.com/page/{}/"
page = 1

while True:
    url = base_url.format(page)
    response = requests.get(url)
    if "No quotes found!" in response.text:
        break

    soup = BeautifulSoup(response.text, "html.parser")
    quotes_data = soup.find_all("div", class_="quote")

    for quote in quotes_data:
        text = quote.find("span", class_="text").get_text()
        author = quote.find("small", class_="author").get_text()
        tags = [tag.get_text() for tag in quote.find_all("a", class_="tag")]

        quotes.append(text)
        authors.append(author)
        tags_list.append(", ".join(tags))

    page += 1


In [14]:
import pandas as pd

df = pd.DataFrame({
    "Quote": quotes,
    "Author": authors,
    "Tags": tags_list
})

df.to_csv("quotes.csv", index=False)
print("Data saved to quotes.csv")
df

Data saved to quotes.csv


Unnamed: 0,Quote,Author,Tags
0,“The world as we have created it is a process ...,Albert Einstein,"change, deep-thoughts, thinking, world"
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"abilities, choices"
2,“There are only two ways to live your life. On...,Albert Einstein,"inspirational, life, live, miracle, miracles"
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"aliteracy, books, classic, humor"
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"be-yourself, inspirational"
...,...,...,...
95,“You never really understand a person until yo...,Harper Lee,better-life-empathy
96,“You have to write the book that wants to be w...,Madeleine L'Engle,"books, children, difficult, grown-ups, write, ..."
97,“Never tell the truth to people who are not wo...,Mark Twain,truth
98,"“A person's a person, no matter how small.”",Dr. Seuss,inspirational


In [15]:
print("Top Authors:")
print(df['Author'].value_counts().head(5))


Top Authors:
Author
Albert Einstein    10
J.K. Rowling        9
Marilyn Monroe      7
Dr. Seuss           6
Mark Twain          6
Name: count, dtype: int64


In [16]:
all_tags = ",".join(df["Tags"]).split(",")
all_tags = [tag.strip() for tag in all_tags if tag.strip() != ""]
tag_counts = pd.Series(all_tags).value_counts()

print("\nTop Tags:")
print(tag_counts.head(5))



Top Tags:
love             14
inspirational    13
life             13
humor            12
books            11
Name: count, dtype: int64


In [17]:
print(f"Total quotes scraped: {len(df)}")
print(f"Total unique authors: {df['Author'].nunique()}")


Total quotes scraped: 100
Total unique authors: 50
