In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd

In [None]:
url = 'http://books.toscrape.com/'

In [None]:
try:
    response = requests.get(url)
    response.raise_for_status()
except requests.RequestException as e:
    print(e)
else:
    status_code = response.status_code
    if status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
    else:
        print(f'Error: {status_code}')
finally:
    response.close()
    print('Connection closed')


In [None]:
print(soup.prettify())

In [None]:
all_links = []

In [None]:
h3s = soup.find_all('h3')
for h3 in h3s:
    a = h3.find('a')
    link = a['href']
    all_links.append(link)

In [None]:
a['href']

In [None]:
page = 1
base_url = 'http://books.toscrape.com/catalogue/page-{}.html'
all_links = []  # Initialize the list

while True:
    url = base_url.format(page)
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        h3s = soup.find_all('h3')

        for h3 in h3s:
            a = h3.find('a')
            relative_link = a['href']
            full_link = urljoin('http://books.toscrape.com/catalogue/', relative_link)  # Convert to absolute URL
            all_links.append(full_link)

        next_page = soup.find('li', class_='next')
        if next_page:
            page += 1  # Move to the next page
        else:
            print('No more pages')
            break
    else:
        print(f'Error: {response.status_code}')
        break

# Print the extracted links
print(f'Total Books Scraped: {len(all_links)}')
for link in all_links[:10]:  # Print first 10 links
    print(link)

In [24]:
# List to store all book data
all_books = []

# Iterate over all book links
for link in all_links:
    response = requests.get(link)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract book title
        title = soup.find('h1').get_text(strip=True)

        # Extract book details from the table
        book_details = {"Title": title}  # Include title
        table = soup.find('table', class_='table table-striped')
        for row in table.find_all('tr'):
            th = row.find('th').get_text(strip=True)
            td = row.find('td').get_text(strip=True)
            book_details[th] = td

        # Extract category from breadcrumb navigation
        breadcrumb = soup.find('ul', class_='breadcrumb')
        breadcrumb_links = breadcrumb.find_all('a')
        category = breadcrumb_links[2].text.strip() if len(breadcrumb_links) > 2 else "Unknown"
        book_details["Category"] = category

        # Store book details
        all_books.append(book_details)
    
    else:
        print(f'Error fetching {link}: {response.status_code}')
        break

# Convert list of books to a Pandas DataFrame
df = pd.DataFrame(all_books)

# Save to CSV
df.to_csv("all_books_details.csv", index=False)