# 📚 Unified Book Dashboard: BooksToScrape + Google Books API
This notebook combines scraped book data (price, stock, category) with metadata from Google Books API (author, description, pages) for enhanced analysis.

In [9]:
# --- Import libraries ---
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import time

## 🔍 Step 1: Scrape Book Listings with Detail Links
We extract title, price, stock, and follow links to fetch each book's category.

In [10]:
# Request page content from BooksToScrape
base_url = 'https://books.toscrape.com/catalogue/'
page_url = 'https://books.toscrape.com/catalogue/page-1.html'
response = requests.get(page_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Initialize lists to store extracted book information
books = []
prices = []
availability = []
categories = []

# Find all book containers
product_pods = soup.select('article.product_pod')

# Extract required information for each book
for pod in product_pods:
    title = pod.h3.a['title']
    price = pod.select_one('p.price_color').text.strip('Â£')
    stock = pod.select_one('p.instock.availability').text.strip()

    # Build book URL and scrape category from the book detail page
    rel_link = pod.h3.a['href'].replace("../../../", "")
    book_url = base_url + rel_link
    book_resp = requests.get(book_url)
    book_soup = BeautifulSoup(book_resp.text, 'html.parser')
    breadcrumb = book_soup.select('ul.breadcrumb li a')
    category = breadcrumb[2].text.strip() if len(breadcrumb) > 2 else 'Unknown'

    # Append the extracted data to lists
    books.append(title)
    prices.append(float(price))
    availability.append('In stock' if 'In stock' in stock else 'Out of stock')
    categories.append(category)

## 🧹 Step 2: Create Initial DataFrame
Organizing extracted data into a structured DataFrame.

In [11]:
data = pd.DataFrame({
    'Title': books,
    'Price': prices,
    'Availability': availability,
    'Category': categories
})

## 🌐 Step 3: Enrich Data from Google Books API
We search each book title and extract author, page count, and description.

In [12]:
# Define your Google Books API key
api_key = 'AIzaSyAN1BaaU0we65Bz8Mb0Xh-_zYCQXsby7_g'  # API KEY

# Initialize lists for enrichment fields
authors = []
pages = []
descriptions = []

# Loop over each book title to fetch additional information
for title in data['Title']:
    query = f"https://www.googleapis.com/books/v1/volumes?q=intitle:{title}&key={api_key}"
    try:
        res = requests.get(query)
        book_data = res.json()
        if 'items' in book_data and book_data['items']:
            info = book_data['items'][0]['volumeInfo']
            authors.append(info.get('authors', ['Unknown'])[0])
            pages.append(info.get('pageCount', None))
            descriptions.append(info.get('description', '')[:200])  # Limit description to first 200 chars
        else:
            authors.append('Unknown')
            pages.append(None)
            descriptions.append('')
        time.sleep(1)  # Respect API rate limits
    except:
        authors.append('Unknown')
        pages.append(None)
        descriptions.append('')

# Add new metadata fields to the existing DataFrame
data['Author'] = authors
data['Pages'] = pages
data['Description'] = descriptions

## 📊 Step 4: Unified Dashboard Visualizations
We visualize the enriched data through multiple charts.

In [13]:
# Scatter plot: Price vs Page Count colored by Category
fig1 = px.scatter(
    data.dropna(subset=['Pages']), x='Pages', y='Price', color='Category',
    hover_data=['Title', 'Author'],
    title='Book Price vs Page Count by Category'
)
fig1.show()

In [14]:
# Pie chart: Breakdown of Availability Status
fig2 = px.pie(
    data, names='Availability', title='Availability of Books'
)
fig2.show()

In [15]:
# Bar chart: Top 10 Most Expensive Books colored by Author
top10 = data.nlargest(10, 'Price')
fig3 = px.bar(
    top10, x='Title', y='Price', color='Author', text='Price',
    title='Top 10 Expensive Books with Authors'
)
fig3.update_layout(xaxis_tickangle=-45)
fig3.show()

In [16]:
# Table visualization: Display full merged dataset
fig4 = go.Figure(data=[
    go.Table(
        header=dict(values=list(data.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[data[col] for col in data.columns], fill_color='lavender', align='left')
    )
])
fig4.update_layout(title='📘 Full Book Dataset (Scraped + Google Books API)')
fig4.show()

## 📌 Summary
- BooksToScrape data combined with Google Books metadata.
- Insights available via scatter plot, pie chart, bar chart, and full dataset table.