In [1]:
!pip install --quiet requests beautifulsoup4 lxml pandas


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time


In [3]:
print("Open this URL in your browser and check rules: https://quotes.toscrape.com/robots.txt")


Open this URL in your browser and check rules: https://quotes.toscrape.com/robots.txt


In [4]:
base_url = "http://quotes.toscrape.com/page/{}/"
rows = []
headers = {'User-Agent': 'Mozilla/5.0'}

for page in range(1, 6):
    url = base_url.format(page)
    r = requests.get(url, headers=headers)
    if r.status_code != 200:
        break
    soup = BeautifulSoup(r.text, "lxml")
    quote_blocks = soup.select(".quote")
    if not quote_blocks:
        break
    for qb in quote_blocks:
        text = qb.select_one(".text").get_text(strip=True)
        author = qb.select_one(".author").get_text(strip=True)
        tags = [t.get_text(strip=True) for t in qb.select(".tags .tag")]
        rows.append({"text": text, "author": author, "tags": ", ".join(tags)})
    time.sleep(1)


In [5]:
df = pd.DataFrame(rows)
df.to_csv("quotes_toscrape.csv", index=False)

from google.colab import files
files.download("quotes_toscrape.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
from google.colab import files
uploaded = files.upload()


Saving quotes_toscrape.csv to quotes_toscrape.csv


In [5]:
import io
import pandas as pd


filename = list(uploaded.keys())[0]


df = pd.read_csv(io.BytesIO(uploaded[filename]))


print("File loaded:", filename)
print("Shape (rows, cols):", df.shape)
display(df.head(6))


print("Columns:", df.columns.tolist())

if 'author' in df.columns:
    print("\nTop 10 authors:")
    display(df['author'].value_counts().head(10))


if 'tags' in df.columns:
    tags_series = df['tags'].astype(str).str.split(r',\s*').explode()
    print("\nTop 10 tags:")
    display(tags_series.value_counts().head(10))


df.to_csv("quotes_uploaded_copy.csv", index=False)
print("\nSaved a working copy as quotes_uploaded_copy.csv — you can download it from the Files pane or run:")
print("from google.colab import files; files.download('quotes_uploaded_copy.csv')")


File loaded: quotes_toscrape.csv
Shape (rows, cols): (50, 3)


Unnamed: 0,text,author,tags
0,“The world as we have created it is a process ...,Albert Einstein,"change, deep-thoughts, thinking, world"
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"abilities, choices"
2,“There are only two ways to live your life. On...,Albert Einstein,"inspirational, life, live, miracle, miracles"
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"aliteracy, books, classic, humor"
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"be-yourself, inspirational"
5,“Try not to become a man of success. Rather be...,Albert Einstein,"adulthood, success, value"


Columns: ['text', 'author', 'tags']

Top 10 authors:


Unnamed: 0_level_0,count
author,Unnamed: 1_level_1
Albert Einstein,8
J.K. Rowling,6
Marilyn Monroe,6
Dr. Seuss,3
Bob Marley,3
Mother Teresa,2
André Gide,1
Jane Austen,1
Steve Martin,1
Eleanor Roosevelt,1



Top 10 tags:


Unnamed: 0_level_0,count
tags,Unnamed: 1_level_1
inspirational,9
love,9
life,8
humor,5
reading,4
books,4
friends,3
friendship,3
simile,2
attributed-no-source,2



Saved a working copy as quotes_uploaded_copy.csv — you can download it from the Files pane or run:
from google.colab import files; files.download('quotes_uploaded_copy.csv')
