# Assignment 4 - Web Scraping Solutions

## Q1: Scrape Books from https://books.toscrape.com/

In [None]:

import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://books.toscrape.com/catalogue/page-{}.html"

titles, prices, availability, ratings = [], [], [], []

page = 1
while True:
    url = base_url.format(page)
    response = requests.get(url)
    if response.status_code != 200:
        break
    
    soup = BeautifulSoup(response.text, "html.parser")
    books = soup.find_all("article", class_="product_pod")
    if not books:
        break
    
    for book in books:
        titles.append(book.h3.a["title"])
        prices.append(book.find("p", class_="price_color").text.strip())
        availability.append(book.find("p", class_="instock availability").text.strip())
        ratings.append(book.find("p")["class"][1])  # Star rating
    
    page += 1

books_df = pd.DataFrame({
    "Title": titles,
    "Price": prices,
    "Availability": availability,
    "Star Rating": ratings
})

books_df.to_csv("books.csv", index=False)
books_df.head()


## Q2: Scrape IMDB Top 250 Movies

In [None]:

from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Chrome()
driver.get("https://www.imdb.com/chart/top/")
time.sleep(3)

movies, years, ranks, ratings = [], [], [], []

rows = driver.find_elements(By.CSS_SELECTOR, ".ipc-metadata-list-summary-item")
for idx, row in enumerate(rows, start=1):
    title_elem = row.find_element(By.CSS_SELECTOR, "h3")
    title = title_elem.text.split('. ', 1)[-1]
    movies.append(title)
    years.append(row.find_element(By.CSS_SELECTOR, ".cli-title-metadata-item").text)
    ranks.append(idx)
    ratings.append(row.find_element(By.CSS_SELECTOR, ".ipc-rating-star").text.split()[0])

driver.quit()

imdb_df = pd.DataFrame({
    "Rank": ranks,
    "Movie Title": movies,
    "Year of Release": years,
    "IMDB Rating": ratings
})

imdb_df.to_csv("imdb_top250.csv", index=False)
imdb_df.head()


## Q3: Scrape Weather Information from https://www.timeanddate.com/weather/

In [None]:

weather_url = "https://www.timeanddate.com/weather/"
response = requests.get(weather_url)
soup = BeautifulSoup(response.text, "html.parser")

cities, temps, conditions = [], [], []

rows = soup.select("table tbody tr")
for row in rows:
    city_elem = row.find("a")
    if city_elem:
        cities.append(city_elem.text.strip())
        temps.append(row.find_all("td")[1].text.strip())
        conditions.append(row.find_all("td")[2].text.strip())

weather_df = pd.DataFrame({
    "City Name": cities,
    "Temperature": temps,
    "Weather Condition": conditions
})

weather_df.to_csv("weather.csv", index=False)
weather_df.head()
