In [29]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
from concurrent.futures import ThreadPoolExecutor





In [None]:
def scrape_page(page):
    url = f"https://quotes.toscrape.com/page/{page}/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    quotes_data = []

    quotes = soup.find_all("div", class_="quote")
    for q in quotes:
        text = q.find("span", class_="text").text
        author = q.find("small", class_="author").text
        tags = [tag.text for tag in q.find_all("a", class_="tag")]

        quotes_data.append({
            "quote": text,
            "author": author,
            "tags": tags
        })

    return quotes_data
#Cell 2: Function Scraping page one





In [None]:

all_data = []

pages = [1, 2, 3, 4, 5]  

with ThreadPoolExecutor(max_workers=5) as executor:
    results = executor.map(scrape_page, pages)

for result in results:
    all_data.extend(result)

print(f"Total records collected: {len(all_data)}")
#Cell 3: Multithreading (Scraping)



Total records collected: 50


In [32]:
with open("scraped_data.json", "w", encoding="utf-8") as f:
    json.dump(all_data, f, indent=4)

print("Data saved to scraped_data.json")
#Cell 4: Save to JSON



Data saved to scraped_data.json


In [33]:
df = pd.read_json("scraped_data.json")
df.head()
#Cell 5: Load JSON with Pandas

Unnamed: 0,quote,author,tags
0,“The world as we have created it is a process ...,Albert Einstein,"[change, deep-thoughts, thinking, world]"
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"[abilities, choices]"
2,“There are only two ways to live your life. On...,Albert Einstein,"[inspirational, life, live, miracle, miracles]"
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"[aliteracy, books, classic, humor]"
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"[be-yourself, inspirational]"


In [34]:
print("Top Authors:")
print(df['author'].value_counts().head(5))
#Cell 6: Simple Analysis

Top Authors:
author
Albert Einstein    8
J.K. Rowling       6
Marilyn Monroe     6
Dr. Seuss          3
Bob Marley         3
Name: count, dtype: int64
