1. Write a python program which searches all the product under a particular product from www.amazon.in. The product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for guitars.

2.In the above question, now scrape the following details of each product listed in first 3 pages of your search results and save it in a data frame and csv. In case if any product has less than 3 pages in search results then scrape all the products available under that product name. Details to be scraped are: "Brand
Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and
“Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def search_amazon_products(product_name):
    base_url = 'https://www.amazon.in'
    search_url = f'{base_url}/s?k={product_name.replace(" ", "+")}'


    response = requests.get(search_url)
    response.raise_for_status()

    
    soup = BeautifulSoup(response.text, 'html.parser')

    products = []
    pages = min(3, int(soup.select('.s-pagination li')[-2].text))  # Get the number of pages (maximum of 3)

    for page in range(1, pages + 1):
        page_url = f'{search_url}&page={page}'
        response = requests.get(page_url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        product_cards = soup.select('.s-result-item')

        for card in product_cards:
            brand_name = card.select_one('.s-line-clamp-1').get_text(strip=True)
            product_name = card.select_one('.a-text-normal').get_text(strip=True)
            price = card.select_one('.a-price-whole')
            return_exchange = card.select_one('.s-replace-4')
            expected_delivery = card.select_one('.s-replace-5')
            availability = card.select_one('.a-color-state')
            product_url = base_url + card.select_one('.a-link-normal')['href']

            price = price.get_text(strip=True) if price else '-'
            return_exchange = return_exchange.get_text(strip=True) if return_exchange else '-'
            expected_delivery = expected_delivery.get_text(strip=True) if expected_delivery else '-'
            availability = availability.get_text(strip=True) if availability else '-'

            product = {
                'Brand Name': brand_name,
                'Product Name': product_name,
                'Price': price,
                'Return/Exchange': return_exchange,
                'Expected Delivery': expected_delivery,
                'Availability': availability,
                'Product URL': product_url
            }
            products.append(product)

    return products

product = input('Enter the product name: ')

results = search_amazon_products(product)

df = pd.DataFrame(results)
df.to_csv('amazon_products.csv', index=False)


7. Write a python program to scrape the details for all billionaires from www.forbes.com. Details to be scrapped: “Rank”, “Name”, “Net worth”, “Age”, “Citizenship”, “Source”, “Industry”.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    url = 'https://www.forbes.com/billionaires/'

    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')

    billionaires = []
    table = soup.find('table')
    rows = table.tbody.find_all('tr')

    for row in rows:
        cells = row.find_all('td')
        rank = cells[0].text.strip()
        name = cells[1].text.strip()
        net_worth = cells[2].text.strip()
        age = cells[3].text.strip()
        citizenship = cells[4].text.strip()
        source = cells[5].text.strip()
        industry = cells[6].text.strip()

        billionaire = {
            'Rank': rank,
            'Name': name,
            'Net Worth': net_worth,
            'Age': age,
            'Citizenship': citizenship,
            'Source': source,
            'Industry': industry
        }
        billionaires.append(billionaire)

    return billionaires

results = scrape_forbes_billionaires()

df = pd.DataFrame(results)
df.to_csv('forbes_billionaires.csv', index=False)


3. Write a python program to access the search bar and search button on images.google.com and scrape 10 images each for keywords ‘fruits’, ‘cars’ and ‘Machine Learning’, ‘Guitar’, ‘Cakes’.

In [None]:
import requests
from bs4 import BeautifulSoup
import urllib.request

def scrape_images(keyword, num_images):
    url = f"https://www.google.com/search?q={keyword}&tbm=isch"

   
    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')

    images = []
    count = 0

    
    image_elements = soup.select('.rg_i.Q4LuWd')

    for image_element in image_elements:
        if count >= num_images:
            break

    
        image_url = image_element['data-src']

        if image_url is not None and image_url.startswith('http'):
            try:
               
                urllib.request.urlretrieve(image_url, f"{keyword}_{count+1}.jpg")
                images.append(image_url)
                count += 1
            except Exception as e:
                print(f"Error scraping image: {e}")

    return images

keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_images = 10

results = {}
for keyword in keywords:
    images = scrape_images(keyword, num_images)
    results[keyword] = images

for keyword, images in results.items():
    print(f"Images for keyword '{keyword}':")
    for i, image_url in enumerate(images):
        print(f"Image {i+1}: {image_url}")


4. Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.com and scrape following details for all the search results displayed on 1st page. Details to be scraped: “Brand Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”,
“Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the details is missing then replace it by “- “. Save your results in a dataframe and CSV.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def search_flipkart_smartphones(product_name):
    base_url = 'https://www.flipkart.com'
    search_url = f'{base_url}/search?q={product_name.replace(" ", "+")}'


    response = requests.get(search_url)
    response.raise_for_status()

    
    soup = BeautifulSoup(response.text, 'html.parser')

    smartphones = []
    product_cards = soup.find_all('div', {'class': '_1AtVbE'})

    for card in product_cards:
        brand_name = card.find('div', {'class': '_4rR01T'})
        product_name = card.find('a', {'class': '_1fQZEK'})
        colour = card.find('a', {'class': '_1fQZEK'}).get('title')
        ram = card.find('ul', {'class': '_1xgFaf'}).li
        storage = card.find('ul', {'class': '_1xgFaf'}).find_all('li')[1]
        primary_camera = card.find('ul', {'class': '_1xgFaf'}).find_all('li')[2]
        secondary_camera = card.find('ul', {'class': '_1xgFaf'}).find_all('li')[3]
        display_size = card.find('ul', {'class': '_1xgFaf'}).find_all('li')[4]
        battery_capacity = card.find('ul', {'class': '_1xgFaf'}).find_all('li')[5]
        price = card.find('div', {'class': '_30jeq3 _1_WHN1'})
        product_url = base_url + card.find('a', {'class': '_1fQZEK'}).get('href')

        # If any of the details is missing, replace it with '-'
        brand_name = brand_name.get_text(strip=True) if brand_name else '-'
        product_name = product_name.get_text(strip=True) if product_name else '-'
        ram = ram.get_text(strip=True) if ram else '-'
        storage = storage.get_text(strip=True) if storage else '-'
        primary_camera = primary_camera.get_text(strip=True) if primary_camera else '-'
        secondary_camera = secondary_camera.get_text(strip=True) if secondary_camera else '-'
        display_size = display_size.get_text(strip=True) if display_size else '-'
        battery_capacity = battery_capacity.get_text(strip=True) if battery_capacity else '-'
        price = price.get_text(strip=True) if price else '-'

        smartphone = {
            'Brand Name': brand_name,
            'Smartphone Name': product_name,
            'Colour': colour,
            'RAM': ram,
            'Storage(ROM)': storage,
            'Primary Camera': primary_camera,
            'Secondary Camera': secondary_camera,
            'Display Size': display_size,
            'Battery Capacity': battery_capacity,
            'Price': price,
            'Product URL': product_url
        }
        smartphones.append(smartphone)

    return smartphones


product = input('Enter the smartphone name: ')

results = search_flipkart_smartphones(product)


df = pd.DataFrame(results)
df.to_csv('flipkart_smartphones.csv', index=False)

5. Write a program to scrap geospatial coordinates (latitude, longitude) of a city searched on google maps.

In [None]:
from geopy.geocoders import Nominatim

def get_coordinates(city):
    geolocator = Nominatim(user_agent='my_app')
    location = geolocator.geocode(city)

    if location:
        latitude = location.latitude
        longitude = location.longitude
        return latitude, longitude
    else:
        return None, None

city = input('Enter the city name: ')

latitude, longitude = get_coordinates(city)

if latitude is not None and longitude is not None:
    print(f"Coordinates for {city}: Latitude = {latitude}, Longitude = {longitude}")
else:
    print("Unable to find coordinates for the specified city.")


6. Write a program to scrap all the available details of best gaming laptops from digit.in.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    url = 'https://www.digit.in/top-products/best-gaming-laptops-40.html'

    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')

    laptops = []
    laptop_cards = soup.find_all('div', {'class': 'TopNumbeHeading sticky-footer'})

    for card in laptop_cards:
        laptop_name = card.find('div', {'class': 'TopNumbeListing'}).get_text(strip=True)
        specs = card.find_next_sibling('ul')

        specifications = {}
        for spec in specs.find_all('li'):
            spec_name = spec.find('div', {'class': 'Specs-Wrap'}).get_text(strip=True)
            spec_value = spec.find('div', {'class': 'Specs-Details'}).get_text(strip=True)
            specifications[spec_name] = spec_value

        laptop = {
            'Laptop Name': laptop_name,
            **specifications
        }
        laptops.append(laptop)

    return laptops


results = scrape_gaming_laptops()

df = pd.DataFrame(results)
df.to_csv('gaming_laptops.csv', index=False)


9. Write a python program to scrape a data for all available Hostels from https://www.hostelworld.com/ in “London” location. You have to scrape hostel name, distance from city centre, ratings, total reviews, overall reviews, privates from price, dorms from price, facilities and property description.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_hostels():
    url = 'https://www.hostelworld.com/search?location=London&country=England'

    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')

    hostels = []
    hostel_cards = soup.find_all('div', {'class': 'fabresult'})

    for card in hostel_cards:
        hostel_name = card.find('h2', {'class': 'title'}).get_text(strip=True)
        distance = card.find('span', {'class': 'distance'}).get_text(strip=True)
        ratings = card.find('div', {'class': 'rating'}).get_text(strip=True)
        total_reviews = card.find('div', {'class': 'reviews'}).get_text(strip=True)
        overall_reviews = card.find('div', {'class': 'score orange'}).get_text(strip=True)
        privates_from_price = card.find('div', {'class': 'price'}).get_text(strip=True)
        dorms_from_price = card.find('div', {'class': 'price'}).find_next('div').get_text(strip=True)
        facilities = ', '.join([f.get_text(strip=True) for f in card.find_all('span', {'class': 'facilities-label'})])
        property_description = card.find('div', {'class': 'desc'}).get_text(strip=True)

        hostel = {
            'Hostel Name': hostel_name,
            'Distance from City Centre': distance,
            'Ratings': ratings,
            'Total Reviews': total_reviews,
            'Overall Reviews': overall_reviews,
            'Privates from Price': privates_from_price,
            'Dorms from Price': dorms_from_price,
            'Facilities': facilities,
            'Property Description': property_description
        }
        hostels.append(hostel)

    return hostels

results = scrape_hostels()

df = pd.DataFrame(results)
df.to_csv('hostels_data.csv', index=False)
