#Level 1

In [1]:
import requests
from bs4 import BeautifulSoup


In [2]:
url = "http://quotes.toscrape.com/"
response = requests.get(url)

In [3]:
soup = BeautifulSoup(response.text, "html.parser")
quote = soup.find("span", class_="text").get_text()
author = soup.find("small", class_="author").get_text()

print("Quote:", quote)
print("Author:", author)

Quote: “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
Author: Albert Einstein


#Level 2

In [4]:
quotes = soup.find_all("span", class_="text")
authors = soup.find_all("small", class_="author")

for q, a in zip(quotes, authors):
    print(f"{q.get_text()} - {a.get_text()}")

“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” - Albert Einstein
“It is our choices, Harry, that show what we truly are, far more than our abilities.” - J.K. Rowling
“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.” - Albert Einstein
“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.” - Jane Austen
“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.” - Marilyn Monroe
“Try not to become a man of success. Rather become a man of value.” - Albert Einstein
“It is better to be hated for what you are than to be loved for what you are not.” - André Gide
“I have not failed. I've just found 10,000 ways that won't work.” - Thomas A. Edison
“A woman is like a tea bag; you never know how strong it is until it's in hot water.” - Eleanor Roos

In [5]:
import pandas as pd

data = []

for q, a in zip(quotes, authors):
    data.append({
        "quote": q.get_text(),
        "author": a.get_text()
    })

df = pd.DataFrame(data)
df.to_csv("quotes_page1.csv", index=False)

df.head()

Unnamed: 0,quote,author
0,“The world as we have created it is a process ...,Albert Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling
2,“There are only two ways to live your life. On...,Albert Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe


#Level 3

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [7]:
all_quotes = []  # untuk menyimpan semua hasil scraping

In [8]:
for page in range(1, 6):
    url = f"http://quotes.toscrape.com/page/{page}/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    quotes = soup.find_all("span", class_="text")
    authors = soup.find_all("small", class_="author")

    print(f"Scraping page: {page} ...")

    for q, a in zip(quotes, authors):
        all_quotes.append({
            "quote": q.get_text(),
            "author": a.get_text()
        })

Scraping page: 1 ...
Scraping page: 2 ...
Scraping page: 3 ...
Scraping page: 4 ...
Scraping page: 5 ...


In [9]:
df = pd.DataFrame(all_quotes)
df.head()

Unnamed: 0,quote,author
0,“The world as we have created it is a process ...,Albert Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling
2,“There are only two ways to live your life. On...,Albert Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe


In [10]:
df.to_csv("quotes_page1-5.csv", index=False)
len(df)  # cek jumlah data yang berhasil diambil

50

#Level 4

In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time


In [12]:
all_quotes = []

In [13]:
for page in range(1, 11):
    url = f"http://quotes.toscrape.com/page/{page}/"
    response = requests.get(url)

    # Jika halaman tidak ada → hentikan loop
    if response.status_code == 404:
        print(f"Page {page} not found. Stopping scraper.")
        break

    print(f"Scraping page {page} ...")

    soup = BeautifulSoup(response.text, "html.parser")
    quotes = soup.find_all("span", class_="text")
    authors = soup.find_all("small", class_="author")

    for q, a in zip(quotes, authors):
        all_quotes.append({
            "quote": q.get_text(),
            "author": a.get_text()
        })

    time.sleep(1)  # jeda 1 detik per halaman

Scraping page 1 ...
Scraping page 2 ...
Scraping page 3 ...
Scraping page 4 ...
Scraping page 5 ...
Scraping page 6 ...
Scraping page 7 ...
Scraping page 8 ...
Scraping page 9 ...
Scraping page 10 ...


In [14]:
df2 = pd.DataFrame(all_quotes)
df2.head()

Unnamed: 0,quote,author
0,“The world as we have created it is a process ...,Albert Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling
2,“There are only two ways to live your life. On...,Albert Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe


In [15]:
print("Total data:", len(df2))
df2.to_csv("quotes_level4.csv", index=False)

Total data: 100


#Level 5

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [17]:
all_quotes = []

In [18]:
for page in range(1, 11):
    url = f"http://quotes.toscrape.com/page/{page}/"
    response = requests.get(url)

    if response.status_code == 404:
        print(f"Page {page} not found. Stopping scraper.")
        break

    print(f"Scraping page {page} ...")
    soup = BeautifulSoup(response.text, "html.parser")

    quote_blocks = soup.find_all("div", class_="quote")

    for block in quote_blocks:
        quote = block.find("span", class_="text").get_text()
        author = block.find("small", class_="author").get_text()

        # ambil semua tag dalam bentuk list teks
        tags = [tag.get_text() for tag in block.find_all("a", class_="tag")]

        # ambil link profil author
        author_link = block.find("a")["href"]

        all_quotes.append({
            "quote": quote,
            "author": author,
            "tags": tags,
            "author_link": author_link
        })

    time.sleep(1)

Scraping page 1 ...
Scraping page 2 ...
Scraping page 3 ...
Scraping page 4 ...
Scraping page 5 ...
Scraping page 6 ...
Scraping page 7 ...
Scraping page 8 ...
Scraping page 9 ...
Scraping page 10 ...


In [19]:
df3 = pd.DataFrame(all_quotes)
df3.head()

Unnamed: 0,quote,author,tags,author_link
0,“The world as we have created it is a process ...,Albert Einstein,"[change, deep-thoughts, thinking, world]",/author/Albert-Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"[abilities, choices]",/author/J-K-Rowling
2,“There are only two ways to live your life. On...,Albert Einstein,"[inspirational, life, live, miracle, miracles]",/author/Albert-Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"[aliteracy, books, classic, humor]",/author/Jane-Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"[be-yourself, inspirational]",/author/Marilyn-Monroe


In [20]:
print("Total data:", len(df3))
df3.to_csv("quotes_level5.csv", index=False)

Total data: 100


#Level 6

In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

df = df3.copy()  # pakai hasil level 5
df.head()

Unnamed: 0,quote,author,tags,author_link
0,“The world as we have created it is a process ...,Albert Einstein,"[change, deep-thoughts, thinking, world]",/author/Albert-Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"[abilities, choices]",/author/J-K-Rowling
2,“There are only two ways to live your life. On...,Albert Einstein,"[inspirational, life, live, miracle, miracles]",/author/Albert-Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"[aliteracy, books, classic, humor]",/author/Jane-Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"[be-yourself, inspirational]",/author/Marilyn-Monroe


In [22]:
author_details = {}

In [25]:
base_url = "http://quotes.toscrape.com"

author_details = {}

for link in set(df["author_link"]):
    url = base_url + link
    response = requests.get(url)

    if response.status_code == 404:
        print(f"Page not found: {url}")
        continue

    soup = BeautifulSoup(response.text, "html.parser")

    try:
        name = soup.find("h3", class_="author-title").get_text(strip=True)
        born_date = soup.find("span", class_="author-born-date").get_text(strip=True)
        born_location = soup.find("span", class_="author-born-location").get_text(strip=True)
        description = soup.find("div", class_="author-description").get_text(strip=True)
    except:
        print(f"Incomplete info for: {url}")
        continue

    author_details[name] = {
        "born_date": born_date,
        "born_location": born_location,
        "description": description
    }

    print(f"Fetched: {name}")
    time.sleep(1)

Fetched: Jim Henson
Fetched: George R.R. Martin
Fetched: Mother Teresa
Fetched: Bob Marley
Fetched: Steve Martin
Fetched: Suzanne Collins
Fetched: Madeleine L'Engle
Fetched: Jorge Luis Borges
Fetched: C.S. Lewis
Fetched: Alexandre Dumas-fils
Fetched: J.K. Rowling
Fetched: George Carlin
Fetched: Alfred Tennyson
Fetched: Thomas A. Edison
Fetched: Allen Saunders
Fetched: Jane Austen
Fetched: Mark Twain
Fetched: Haruki Murakami
Fetched: Stephenie Meyer
Fetched: J.R.R. Tolkien
Fetched: John Lennon
Fetched: Albert Einstein
Fetched: Marilyn Monroe
Fetched: George Eliot
Fetched: Terry Pratchett
Fetched: Ayn Rand
Fetched: E.E. Cummings
Fetched: W.C. Fields
Fetched: J.M. Barrie
Fetched: Ernest Hemingway
Fetched: Elie Wiesel
Fetched: Charles Bukowski
Fetched: Pablo Neruda
Fetched: Martin Luther King Jr.
Fetched: William Nicholson
Fetched: James Baldwin
Fetched: Khaled Hosseini
Fetched: Douglas Adams
Fetched: Harper Lee
Fetched: Charles M. Schulz
Fetched: Dr. Seuss
Fetched: Garrison Keillor
Fetche

In [26]:
df["born_date"] = df["author"].apply(lambda x: author_details.get(x, {}).get("born_date"))
df["born_location"] = df["author"].apply(lambda x: author_details.get(x, {}).get("born_location"))
df["description"] = df["author"].apply(lambda x: author_details.get(x, {}).get("description"))

df.head()

Unnamed: 0,quote,author,tags,author_link,born_date,born_location,description
0,“The world as we have created it is a process ...,Albert Einstein,"[change, deep-thoughts, thinking, world]",/author/Albert-Einstein,"March 14, 1879","in Ulm, Germany","In 1879, Albert Einstein was born in Ulm, Germ..."
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"[abilities, choices]",/author/J-K-Rowling,"July 31, 1965","in Yate, South Gloucestershire, England, The U...",See also: Robert GalbraithAlthough she writes ...
2,“There are only two ways to live your life. On...,Albert Einstein,"[inspirational, life, live, miracle, miracles]",/author/Albert-Einstein,"March 14, 1879","in Ulm, Germany","In 1879, Albert Einstein was born in Ulm, Germ..."
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"[aliteracy, books, classic, humor]",/author/Jane-Austen,"December 16, 1775","in Steventon Rectory, Hampshire, The United Ki...",Jane Austen was an English novelist whose work...
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"[be-yourself, inspirational]",/author/Marilyn-Monroe,"June 01, 1926",in The United States,Marilyn Monroe (born Norma Jeane Mortenson; Ju...


In [27]:
df.to_csv("quotes_final.csv", index=False)
len(df), df.isna().sum()

(100,
 quote            0
 author           0
 tags             0
 author_link      0
 born_date        1
 born_location    1
 description      1
 dtype: int64)