Q1. Scrape Books from Books to Scrape

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://books.toscrape.com/catalogue/page-{}.html"
books = []

page = 1
while True:
    url = base_url.format(page)
    response = requests.get(url)
    if response.status_code != 200:
        break

    soup = BeautifulSoup(response.text, "html.parser")
    articles = soup.find_all("article", class_="product_pod")
    if not articles:
        break

    for book in articles:
        title = book.h3.a["title"]
        price = book.find("p", class_="price_color").text
        availability = book.find("p", class_="instock availability").text.strip()
        star_rating = book.p["class"][1]

        books.append({
            "Title": title,
            "Price": price,
            "Availability": availability,
            "Star Rating": star_rating
        })

    page += 1

df_books = pd.DataFrame(books)
df_books.to_csv("books.csv", index=False)
print("Scraped", len(df_books), "books and saved to books.csv")

Scraped 1000 books and saved to books.csv


Q2. Scrape IMDB Top 250 Movies

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.imdb.com/chart/top/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

movies = []
rows = soup.select("tbody.lister-list tr")

for row in rows:
    rank = int(row.find("td", class_="titleColumn").get_text(strip=True).split(".")[0])
    title = row.find("td", class_="titleColumn a").get_text(strip=True)
    year = int(row.find("td", class_="titleColumn span").get_text(strip=True).strip("()"))
    rating = float(row.find("td", class_="ratingColumn imdbRating strong").get_text(strip=True))

    movies.append({
        "Rank": rank,
        "Movie Title": title,
        "Year": year,
        "IMDB Rating": rating
    })

df_movies = pd.DataFrame(movies)
df_movies.to_csv("imdb_top250.csv", index=False)
print("Scraped", len(df_movies), "movies and saved to imdb_top250.csv")


Scraped 0 movies and saved to imdb_top250.csv


Q3. Scrape Weather for Top Cities

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.timeanddate.com/weather/india/new-delhi/historic"
headers = {"User-Agent": "Mozilla/5.0"}
res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.text, "html.parser")

table = soup.find("table", id="wt-his")
if table:
    data = []
    for tr in table.find("tbody").find_all("tr"):
        time_label = tr.find("th").get_text(strip=True)
        tds = tr.find_all("td")
        data.append({
            "Time": time_label,
            "Temperature": tds[1].get_text(strip=True),
            "Weather": tds[2].get_text(strip=True),
            "Wind": tds[3].get_text(strip=True),
            "Humidity": tds[5].get_text(strip=True),
            "Barometer": tds[6].get_text(strip=True),
            "Visibility": tds[7].get_text(strip=True),
        })
    df = pd.DataFrame(data)
    df.to_csv("historic_weather_delhi.csv", index=False)
    print("Scraped historical weather:", len(df), "records")
else:
    print("Historic weather table not found!")


Scraped historical weather: 8 records


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.timeanddate.com/weather/"
headers = {"User-Agent": "Mozilla/5.0"}
res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.text, "html.parser")

cities = []
table = soup.select_one("table.zebra.tb-wt")

if table:
    for row in table.select("tbody tr"):
        cols = row.find_all("td")
        if len(cols) >= 3:
            city = cols[0].get_text(strip=True)
            temp = cols[1].get_text(strip=True)
            condition = cols[2].get_text(strip=True)

            cities.append({
                "City Name": city,
                "Temperature": temp,
                "Weather Condition": condition
            })

df_weather = pd.DataFrame(cities)
df_weather.to_csv("weather.csv", index=False)
print(" Scraped", len(df_weather), "cities and saved to weather.csv")


 Scraped 0 cities and saved to weather.csv
