# Assignment 3
- Name : Deepinder Singh Saini
- Roll No. : 102303673

Q1. Write a Python program to scrape all available books from the website 
(https://books.toscrape.com/) Books to Scrape – a live site built for practicing scraping (safe, 
legal, no anti-bot). For each book, extract the following details: 
1. Title 
2. Price 
3. Availability (In stock / Out of stock) 
4. Star Rating (One, Two, Three, Four, Five) 
Store the scraped results into a Pandas DataFrame and export them to a CSV file named 
books.csv. 
(Note: Use the requests library to fetch the HTML page. Use BeautifulSoup to parse and extract 
book details and handle pagination so that books from all pages are scraped) 

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

titles = []
prices = []
stocks = []
stars = []

url = "https://books.toscrape.com/catalogue/page-1.html"

while url:
    page = requests.get(url)
    soup = BeautifulSoup(page.text, "html.parser")
    books = soup.find_all("article", class_="product_pod")
    for book in books:
        titles.append(book.h3.a["title"])
        prices.append(book.find("p", class_="price_color").text)
        stocks.append(book.find("p", class_="instock availability").text.strip())
        stars.append(book.p["class"][1])
    next_btn = soup.find("li", class_="next")
    if next_btn:
        next_page = next_btn.a["href"]
        url = "https://books.toscrape.com/catalogue/" + next_page
    else:
        url = None

df = pd.DataFrame({"Title": titles, "Price": prices, "Availability": stocks, "Star Rating": stars})
df.to_csv("books.csv", index=False)


Q2. Write a Python program to scrape the IMDB Top 250 Movies list 
(https://www.imdb.com/chart/top/) . For each movie, extract the following details: 
1. Rank (1–250) 
2. Movie Title 
3. Year of Release 
4. IMDB Rating 
Store the results in a Pandas DataFrame and export it to a CSV file named imdb_top250.csv. 

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

response = requests.get("https://www.imdb.com/chart/top/", headers={"User-Agent":"Mozilla/5.0"})
soup = BeautifulSoup(response.text, "html.parser")

ranks = []
titles = []
years = []
ratings = []

rows = soup.select("table.chart.full-width tr")
for row in rows[1:]:
    rank_col = row.find("td", class_="titleColumn")
    rating_col = row.find("td", class_="ratingColumn imdbRating")
    if rank_col and rating_col:
        rank_text = rank_col.get_text(strip=True)
        rank = int(rank_text.split('\n')[0].strip().strip('.'))
        title = rank_col.a.text
        year = re.search(r"\((\d{4})\)", rank_col.find("span", class_="secondaryInfo").text).group(1)
        rating = rating_col.strong.text
        ranks.append(rank)
        titles.append(title)
        years.append(year)
        ratings.append(rating)

df = pd.DataFrame({"Rank": ranks, "Movie Title": titles, "Year": years, "IMDB Rating": ratings})
df.to_csv("imdb_top250.csv", index=False)



Q3. Write a Python program to scrape the weather information for top world cities from the 
given website (https://www.timeanddate.com/weather/) . For each city, extract the following 
details: 
1. City Name 
2. Temperature 
3. Weather Condition (e.g., Clear, Cloudy, Rainy, etc.) 
Store the results in a Pandas DataFrame and export it to a CSV file named weather.csv.

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.timeanddate.com/weather/"
page = requests.get(url)
soup = BeautifulSoup(page.text, "html.parser")

cities = []
temps = []
conds = []

rows = soup.select("table tbody tr")[:20]
for row in rows:
    cols = row.find_all("td")
    if len(cols) > 2:
        cities.append(cols[0].text.strip())
        temps.append(cols[1].text.strip())
        conds.append(cols[2].text.strip())

df = pd.DataFrame({"City": cities, "Temperature": temps, "Condition": conds})
df.to_csv("weather.csv", index=False)
