In [23]:
import requests
from bs4 import BeautifulSoup

In [24]:
url = "https://quotes.toscrape.com/"

In [25]:
headers = {"User-Agent": "Mozilla/5.0"}
html = requests.get(url, headers=headers)

In [26]:
html.status_code

200

In [27]:
url = "https://quotes.toscrape.com/"
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")

In [40]:
all_tags = [t.text for t in soup.find_all("a", class_="tag")]
sorted(set(all_tags))


['books',
 'friends',
 'friendship',
 'humor',
 'inspirational',
 'life',
 'love',
 'reading',
 'simile',
 'truth']

In [28]:
quote = soup.find("div", class_="quote")
quote

<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
<span class="text" itemprop="text">“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”</span>
<span>by <small class="author" itemprop="author">Albert Einstein</small>
<a href="/author/Albert-Einstein">(about)</a>
</span>
<div class="tags">
            Tags:
            <meta class="keywords" content="change,deep-thoughts,thinking,world" itemprop="keywords"/>
<a class="tag" href="/tag/change/page/1/">change</a>
<a class="tag" href="/tag/deep-thoughts/page/1/">deep-thoughts</a>
<a class="tag" href="/tag/thinking/page/1/">thinking</a>
<a class="tag" href="/tag/world/page/1/">world</a>
</div>
</div>

In [29]:
quote_text = quote.find("span", class_="text").text
quote_text

'“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'

In [30]:
author = quote.find("small", class_="author").text
author

'Albert Einstein'

In [32]:
tags = [t.text for t in quote.find_all("a", class_="tag")]
tags


['change', 'deep-thoughts', 'thinking', 'world']

In [37]:
from urllib.parse import urljoin

author_link = quote.find("a")["href"]
author_url = urljoin(url, author_link)
author_url


'https://quotes.toscrape.com/author/Albert-Einstein'

In [38]:
r2 = requests.get(author_url)
soup2 = BeautifulSoup(r2.text, "html.parser")

born = soup2.find(class_="author-born-date").text + " " + soup2.find(class_="author-born-location").text
desc = soup2.find(class_="author-description").text.strip()

born, desc[:150] + "..."


('March 14, 1879 in Ulm, Germany',
 'In 1879, Albert Einstein was born in Ulm, Germany. He completed his Ph.D. at the University of Zurich by 1909. His 1905 paper explaining the photoelec...')

In [39]:
quotes = soup.find_all("div", class_="quote")
for q in quotes:
    text = q.find("span", class_="text").text
    author = q.find("small", class_="author").text
    tags = [t.text for t in q.find_all("a", class_="tag")]
    print(text, "—", author, tags)


In [43]:
for i in range(1, 4):
    print(f"Page {i}")
    r = requests.get(f"https://quotes.toscrape.com/page/{i}/")
    print("Status:", r.status_code)


Page 1
Status: 200
Page 2
Status: 200
Page 3
Status: 200


In [36]:
import pandas as pd

all_data = []
page = 1

while True:
    url = "https://quotes.toscrape.com/" if page == 1 else f"https://quotes.toscrape.com/page/{page}/"
    r = requests.get(url)
    if r.status_code != 200:
        break
    soup = BeautifulSoup(r.text, "html.parser")
    quotes = soup.find_all("div", class_="quote")
    if not quotes:
        break

    for q in quotes:
        text = q.find("span", class_="text").text
        author = q.find("small", class_="author").text
        tags = [t.text for t in q.find_all("a", class_="tag")]
        author_url = urljoin(url, q.find("a")["href"])

        r2 = requests.get(author_url)
        s2 = BeautifulSoup(r2.text, "html.parser")
        born = s2.find(class_="author-born-date").text + " " + s2.find(class_="author-born-location").text
        desc = s2.find(class_="author-description").text.strip()

        all_data.append({
            "quote": text,
            "author": author,
            "tags": ", ".join(tags),
            "born": born,
            "description": desc
        })
    page += 1

df = pd.DataFrame(all_data)
df


Unnamed: 0,quote,author,tags,born,description
0,“The world as we have created it is a process ...,Albert Einstein,"change, deep-thoughts, thinking, world","March 14, 1879 in Ulm, Germany","In 1879, Albert Einstein was born in Ulm, Germ..."
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"abilities, choices","July 31, 1965 in Yate, South Gloucestershire, ...",See also: Robert GalbraithAlthough she writes ...
2,“There are only two ways to live your life. On...,Albert Einstein,"inspirational, life, live, miracle, miracles","March 14, 1879 in Ulm, Germany","In 1879, Albert Einstein was born in Ulm, Germ..."
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"aliteracy, books, classic, humor","December 16, 1775 in Steventon Rectory, Hampsh...",Jane Austen was an English novelist whose work...
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"be-yourself, inspirational","June 01, 1926 in The United States",Marilyn Monroe (born Norma Jeane Mortenson; Ju...
...,...,...,...,...,...
95,“You never really understand a person until yo...,Harper Lee,better-life-empathy,"April 28, 1926 in Monroeville, Alabama, The Un...","Harper Lee, known as Nelle, was born in the Al..."
96,“You have to write the book that wants to be w...,Madeleine L'Engle,"books, children, difficult, grown-ups, write, ...","November 29, 1918 in New York City, New York, ...",Madeleine L'Engle was an American writer best ...
97,“Never tell the truth to people who are not wo...,Mark Twain,truth,"November 30, 1835 in Florida, Missouri, The Un...","Samuel Langhorne Clemens, better known by his ..."
98,"“A person's a person, no matter how small.”",Dr. Seuss,inspirational,"March 02, 1904 in Springfield, MA, The United ...",Theodor Seuss Geisel was born 2 March 1904 in ...


In [44]:
df.to_csv("quotes.csv", index=False)

from google.colab import files
files.download("quotes.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>