In [2]:
# Q1: Books to Scrape
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://books.toscrape.com/catalogue/page-{}.html"

titles, prices, availability, ratings = [], [], [], []


for page in range(1, 51):
    url = base_url.format(page)
    res = requests.get(url)
    if res.status_code != 200:
        break
    soup = BeautifulSoup(res.text, "html.parser")

    books = soup.find_all("article", class_="product_pod")
    for book in books:
        titles.append(book.h3.a['title'])
        prices.append(book.find("p", class_="price_color").text.strip())
        availability.append(book.find("p", class_="instock availability").text.strip())
        ratings.append(book.p["class"][1])

df_books = pd.DataFrame({
    "Title": titles,
    "Price": prices,
    "Availability": availability,
    "Star Rating": ratings
})
df_books.to_csv("books.csv", index=False)
df_books.head()



Unnamed: 0,Title,Price,Availability,Star Rating
0,A Light in the Attic,Â£51.77,In stock,Three
1,Tipping the Velvet,Â£53.74,In stock,One
2,Soumission,Â£50.10,In stock,One
3,Sharp Objects,Â£47.82,In stock,Four
4,Sapiens: A Brief History of Humankind,Â£54.23,In stock,Five


In [2]:
# Q2: IMDB Top 250 Movies
import requests
from bs4 import BeautifulSoup
import pandas as pd


url = "https://www.imdb.com/chart/top/"
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")


ranks, movies, years, ratings = [], [], [], []

rows = soup.select("tbody.lister-list tr")


for idx, row in enumerate(rows, start=1):
    title = row.select_one("td.titleColumn a").text.strip()
    year = row.select_one("td.titleColumn span.secondaryInfo").text.strip("()")
    rating = row.select_one("td.imdbRating strong").text.strip()

    ranks.append(idx)
    movies.append(title)
    years.append(year)
    ratings.append(rating)

df_imdb = pd.DataFrame({
    "Rank": ranks,
    "Movie Title": movies,
    "Year of Release": years,
    "IMDB Rating": ratings
})


df_imdb.to_csv("imdb_top250.csv", index=False)


df_imdb.head(10)


Unnamed: 0,Rank,Movie Title,Year of Release,IMDB Rating


In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.timeanddate.com/weather/"
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")

cities, temps, conditions = [], [], []

# The weather table is the first with class "zebra tb-wt fw tb-hover"
table = soup.select_one("table.zebra.tb-wt.fw.tb-hover")

if table:
    rows = table.select("tr")[1:]  # skip header row
    for row in rows:
        cols = row.find_all("td")
        if len(cols) >= 3:
            city = cols[0].text.strip()
            temp = cols[1].text.strip()
            cond = cols[2].text.strip()
            cities.append(city)
            temps.append(temp)
            conditions.append(cond)

df_weather = pd.DataFrame({
    "City": cities,
    "Temperature": temps,
    "Condition": conditions
})

df_weather.to_csv("weather.csv", index=False)


df_weather.head(10)



Unnamed: 0,City,Temperature,Condition
