In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL for the site, with a placeholder for the page number
base_url = 'http://books.toscrape.com/catalogue/page-{}.html'

# List to store book data
books = []

# Loop through the first 5 pages
for page in range(1, 6):
    # Request the page content
    response = requests.get(base_url.format(page))
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all book entries on the page
    for book in soup.select('article.product_pod'):
        # Extract book details
        title = book.h3.a['title']
        price = book.select_one('p.price_color').text
        stock = book.select_one('p.instock.availability').text.strip()
        rating = book.p['class'][1]
        description = book.select_one('p.subtitle').text if book.select_one('p.subtitle') else 'No description'
        
        # Get product details by requesting the book's detail page
        product_url = book.h3.a['href']
        product_response = requests.get(f"http://books.toscrape.com/catalogue/{product_url}")
        product_soup = BeautifulSoup(product_response.text, 'html.parser')
        
        # Extract category and product information with error handling
        category_element = product_soup.select_one('ul.breadcrumb li:nth-of-type(3)')
        category = category_element.text if category_element else 'No category'

        info_table = product_soup.select_one('table.table.table-striped')
        info = info_table.text if info_table else 'No information available'
        
        # Append the book data to the list
        books.append({
            'Title': title,
            'Price': price,
            'Stock': stock,
            'Rating': rating,
            'Description': description,
            'Product Information': info,
            'Category': category
        })

# Convert the list of books to a DataFrame and save to CSV
df_books = pd.DataFrame(books)
df_books.to_csv('books_data.csv', index=False)
print(df_books.head())


                                   Title    Price     Stock Rating  \
0                   A Light in the Attic  Â£51.77  In stock  Three   
1                     Tipping the Velvet  Â£53.74  In stock    One   
2                             Soumission  Â£50.10  In stock    One   
3                          Sharp Objects  Â£47.82  In stock   Four   
4  Sapiens: A Brief History of Humankind  Â£54.23  In stock   Five   

      Description                                Product Information  \
0  No description  \n\nUPCa897fe39b1053632\n\n\nProduct TypeBooks...   
1  No description  \n\nUPC90fa61229261140a\n\n\nProduct TypeBooks...   
2  No description  \n\nUPC6957f44c3847a760\n\n\nProduct TypeBooks...   
3  No description  \n\nUPCe00eb4fd7b871a48\n\n\nProduct TypeBooks...   
4  No description  \n\nUPC4165285e1663650f\n\n\nProduct TypeBooks...   

                 Category  
0              \nPoetry\n  
1  \nHistorical Fiction\n  
2             \nFiction\n  
3             \nMystery\n  
4     

In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL for the quotes page
base_url = 'http://quotes.toscrape.com'

# List to store quote data
quotes = []

# Function to scrape a single page
def scrape_page(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all quote entries on the page
        for quote_div in soup.select('div.quote'):
            text = quote_div.select_one('span.text').get_text(strip=True)
            author = quote_div.select_one('small.author').get_text(strip=True)
            tags = [tag.get_text(strip=True) for tag in quote_div.select('div.tags a.tag')]
            
            quotes.append({
                'Text': text,
                'Author': author,
                'Tags': ', '.join(tags)
            })
    else:
        print(f"Failed to retrieve page. Status code: {response.status_code}")

# Scrape the first few pages (modify as needed)
for page in range(1, 6):  # Example: Scrape first 5 pages
    page_url = f'{base_url}/page/{page}/'
    scrape_page(page_url)

# Convert the list of quotes to a DataFrame and save to CSV
df_quotes = pd.DataFrame(quotes)
df_quotes.to_csv('quotes_data.csv', index=False)
print(df_quotes.head() if not df_quotes.empty else "No data found to save.")


                                                Text           Author  \
0  “The world as we have created it is a process ...  Albert Einstein   
1  “It is our choices, Harry, that show what we t...     J.K. Rowling   
2  “There are only two ways to live your life. On...  Albert Einstein   
3  “The person, be it gentleman or lady, who has ...      Jane Austen   
4  “Imperfection is beauty, madness is genius and...   Marilyn Monroe   

                                           Tags  
0        change, deep-thoughts, thinking, world  
1                            abilities, choices  
2  inspirational, life, live, miracle, miracles  
3              aliteracy, books, classic, humor  
4                    be-yourself, inspirational  


In [6]:
import requests
from bs4 import BeautifulSoup

# Request a random Wikipedia page
response = requests.get('https://en.wikipedia.org/wiki/Special:Random')
soup = BeautifulSoup(response.text, 'html.parser')

# Extract the title and the first part of the content
title = soup.find('h1', {'id': 'firstHeading'}).text
content = soup.find('div', {'class': 'mw-parser-output'}).text

# Print the title and a snippet of the content
print(f"Title: {title}")
print(f"Content:\n{content[:500]}...")  # Print the first 500 characters of the content


Title: Anwar Hossain (microbiologist)
Content:

Anwar Hossain is a Bangladeshi biologist and vice-chancellor of Jashore University of Science and Technology.[1][2] Hossain was a professor of the Department of Microbiology at the University of Dhaka.[3]


Early life and education[edit]
Hossain was born on 1 January 1958 in Satgharia, Louhajang Upazila, Munshiganj District, East Pakistan, Pakistan.[4] He did his bachelors and masters in Biochemistry and Molecular Biology at the University of Dhaka in 1981 and 1983 respectively.[5] He did his P...
