# 1. Write a python program which searches all the product under a particular product from www.amazon.in. The product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for guitars. 


In [None]:
import requests
from bs4 import BeautifulSoup

def search_amazon(product):
    base_url = "https://www.amazon.in/s?k="
    search_url = base_url + product.replace(" ", "+")

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(search_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.find_all('div', {'data-component-type': 's-search-result'})

        if products:
            for product in products:
                title = product.find('span', {'class': 'a-text-normal'}).text.strip()
                price = product.find('span', {'class': 'a-price-whole'})
                if price:
                    price = price.text.strip()
                else:
                    price = "Price not available"
                print("Product:", title)
                print("Price:", price)
                print("="*50)
        else:
            print("No products found.")
    else:
        print("Failed to retrieve Amazon page.")

if __name__ == "__main__":
    user_input = input("Enter the product to search on Amazon.in: ")
    search_amazon(user_input)


# 2. In the above question, now scrape the following details of each product listed in first 3 pages of your search results and save it in a data frame and csv. In case if any product has less than 3 pages in search results thenscrape all the products available under that product name. Details to be scraped are:  randName", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and“Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_product_details(product_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(product_url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        brand_name = soup.find('a', {'id': 'bylineInfo'}).text.strip() if soup.find('a', {'id': 'bylineInfo'}) else '-'
        product_name = soup.find('span', {'id': 'productTitle'}).text.strip() if soup.find('span', {'id': 'productTitle'}) else '-'
        price = soup.find('span', {'id': 'priceblock_ourprice'}).text.strip() if soup.find('span', {'id': 'priceblock_ourprice'}) else '-'
        return_exchange = soup.find('div', {'id': 'RETURNS_POLICY'}).text.strip() if soup.find('div', {'id': 'RETURNS_POLICY'}) else '-'
        expected_delivery = soup.find('div', {'id': 'ddmDeliveryMessage'}).text.strip() if soup.find('div', {'id': 'ddmDeliveryMessage'}) else '-'
        availability = soup.find('div', {'id': 'availability'}).text.strip() if soup.find('div', {'id': 'availability'}) else '-'
        return brand_name, product_name, price, return_exchange, expected_delivery, availability, product_url
    else:
        return '-', '-', '-', '-', '-', '-', '-'

def search_amazon(product):
    base_url = "https://www.amazon.in/s?k="
    search_url = base_url + product.replace(" ", "+")
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    all_products = []
    for page_number in range(1, 4):
        page_url = search_url + "&page=" + str(page_number)
        response = requests.get(page_url, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            products = soup.find_all('div', {'data-component-type': 's-search-result'})
            if not products:
                break
            for product in products:
                product_link = 'https://www.amazon.in' + product.find('a', {'class': 'a-link-normal'})['href']
                details = get_product_details(product_link)
                all_products.append(details)

    return all_products

if __name__ == "__main__":
    user_input = input("Enter the product to search on Amazon.in: ")
    products_data = search_amazon(user_input)

    
    df = pd.DataFrame(products_data, columns=["Brand Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability", "Product URL"])

    
    df.to_csv("amazon_products.csv", index=False)

    print("Scraping and saving complete.")


# 3. Write a python program to access the search bar and search button on images.google.com and scrape 10images each for keywords ‘fruits’, ‘cars’ and ‘Machine Learning’, ‘Guitar’, ‘Cakes’. 

In [None]:
from selenium import webdriver
import time
import os
import requests
from bs4 import BeautifulSoup

def scrape_images(keyword, num_images):
    driver = webdriver.Chrome()  
    driver.get("https://www.google.com/imghp?hl=en&ogbl")
    time.sleep(2)  

    
    search_bar = driver.find_element_by_xpath("//input[@title='Search']")
    search_bar.send_keys(keyword)
    search_bar.submit()
    time.sleep(2) 

    
    for _ in range(5):
        driver.execute_script("window.scrollBy(0,1000)")

    
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    image_elements = soup.find_all('img', {'class': 'rg_i'})

    
    directory = keyword.replace(" ", "_")
    os.makedirs(directory, exist_ok=True)

    
    count = 0
    for img in image_elements[:num_images]:
        try:
            image_url = img['src']
            image_data = requests.get(image_url).content
            with open(f"{directory}/image{count + 1}.jpg", "wb") as f:
                f.write(image_data)
            print(f"Downloaded image {count + 1} for '{keyword}'")
            count += 1
        except Exception as e:
            print(f"Error downloading image {count + 1} for '{keyword}': {e}")

        if count >= num_images:
            break

    driver.quit()

if __name__ == "__main__":
    keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
    num_images = 10
    for keyword in keywords:
        scrape_images(keyword, num_images)


# 4. Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.com and scrape following details for all the   search results displayed on 1st page. Details to be scraped: “Brand Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”, “Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the details is missing then replace it by “- “. Save your results in a dataframe and CSV. 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    base_url = f"https://www.flipkart.com/search?q={search_query}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&page=1"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }

    response = requests.get(base_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        products = soup.find_all("div", {"class": "_1AtVbE"})  

        results = []

        for product in products:
            try:
                brand_name = product.find("div", {"class": "_4rR01T"}).text.strip()
            except AttributeError:
                brand_name = "-"

            try:
                name = product.find("a", {"class": "IRpwTa"}).text.strip()
            except AttributeError:
                name = "-"

            try:
                color = product.find("a", {"class": "IRpwTa"}).text.strip().split(",")[1].strip()
            except (AttributeError, IndexError):
                color = "-"

            specs = product.find_all("li", {"class": "rgWa7D"})
            specs_dict = {}
            for spec in specs:
                try:
                    key, value = spec.text.split(":")
                    specs_dict[key.strip()] = value.strip()
                except ValueError:
                    pass

            ram = specs_dict.get("RAM", "-")
            rom = specs_dict.get("ROM", "-")
            primary_camera = specs_dict.get("Primary Camera", "-")
            secondary_camera = specs_dict.get("Secondary Camera", "-")
            display_size = specs_dict.get("Display Size", "-")
            battery_capacity = specs_dict.get("Battery Capacity", "-")

            try:
                price = product.find("div", {"class": "_30jeq3 _1_WHN1"}).text.strip().replace("₹", "").replace(",", "").strip()
            except AttributeError:
                price = "-"

            try:
                product_url = "https://www.flipkart.com" + product.find("a", {"class": "IRpwTa"})["href"]
            except AttributeError:
                product_url = "-"

            result = {
                "Brand Name": brand_name,
                "Smartphone Name": name,
                "Colour": color,
                "RAM": ram,
                "Storage(ROM)": rom,
                "Primary Camera": primary_camera,
                "Secondary Camera": secondary_camera,
                "Display Size": display_size,
                "Battery Capacity": battery_capacity,
                "Price": price,
                "Product URL": product_url,
            }

            results.append(result)

        return results
    else:
        print("Failed to fetch data from Flipkart.")
        return []

if __name__ == "__main__":
    search_query = input("Enter the smartphone you want to search for on Flipkart: ")
    search_results = scrape_flipkart_smartphones(search_query)

    if search_results:
        df = pd.DataFrame(search_results)
        df.to_csv(f"{search_query}_flipkart_search_results.csv", index=False)
        print("Search results saved successfully.")
    else:
        print("No search results found.")


# 5. Write a program to scrap geospatial coordinates (latitude, longitude) of a city searched on google maps. 

In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_coordinates(city):
    base_url = "https://www.google.com/maps/search/"
    search_url = base_url + city.replace(" ", "+")

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    response = requests.get(search_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        try:
        
            url = soup.find("meta", property="og:url")["content"]
            latitude, longitude = url.split("/@")[1].split(",")[0:2]
            return latitude, longitude
        except Exception as e:
            print(f"Error extracting coordinates: {e}")
            return None, None
    else:
        print("Failed to retrieve data from Google Maps.")
        return None, None

if __name__ == "__main__":
    city = input("Enter the city name to search for on Google Maps: ")
    latitude, longitude = scrape_coordinates(city)
    if latitude and longitude:
        print(f"Coordinates of {city}: Latitude - {latitude}, Longitude - {longitude}")
    else:
        print("Failed to retrieve coordinates.")


# 6. Write a program to scrap all the available details of best gaming laptops from digit.in. 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        laptops = soup.find_all('div', class_='TopNumbeHeading sticky-footer')

        results = []

        for laptop in laptops:
            name = laptop.find('div', class_='TopNumbeHeading sticky-footer').text.strip()
            specs = laptop.find('div', class_='smprice')

            processor = specs.find('div', class_='value').text.strip()
            memory = specs.find_all('div', class_='value')[1].text.strip()
            os = specs.find_all('div', class_='value')[2].text.strip()
            storage = specs.find_all('div', class_='value')[3].text.strip()
            display_size = specs.find_all('div', class_='value')[4].text.strip()
            price = specs.find('td', class_='smprice').text.strip()
            product_url = laptop.find('a')['href']

            result = {
                "Name": name,
                "Processor": processor,
                "Memory": memory,
                "Operating System": os,
                "Storage": storage,
                "Display Size": display_size,
                "Price": price,
                "Product URL": product_url
            }

            results.append(result)

        return results
    else:
        print("Failed to fetch data from digit.in.")
        return []

if __name__ == "__main__":
    gaming_laptops_data = scrape_gaming_laptops()

    if gaming_laptops_data:
        df = pd.DataFrame(gaming_laptops_data)
        df.to_csv("gaming_laptops_digit.csv", index=False)
        print("Scraping complete. Data saved to 'gaming_laptops_digit.csv'")
    else:
        print("No data found.")


# 7. Write a python program to scrape the details for all billionaires from www.forbes.com. Details to be scrapped: “Rank”, “Name”, “Net worth”, “Age”, “Citizenship”, “Source”, “Industry”. 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_billionaires():
    url = "https://www.forbes.com/billionaires/"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        billionaires = soup.find_all('div', class_='personName')

        results = []

        for i, billionaire in enumerate(billionaires, start=1):
            name = billionaire.text.strip()
            rank = i
            net_worth = billionaire.find_next('div', class_='netWorth').text.strip()
            age = billionaire.find_next('div', class_='age').text.strip()
            citizenship = billionaire.find_next('div', class_='countryOfCitizenship').text.strip()
            source = billionaire.find_next('div', class_='source-column').text.strip()
            industry = billionaire.find_next('div', class_='category').text.strip()

            result = {
                "Rank": rank,
                "Name": name,
                "Net Worth": net_worth,
                "Age": age,
                "Citizenship": citizenship,
                "Source": source,
                "Industry": industry
            }

            results.append(result)

        return results
    else:
        print("Failed to fetch data from Forbes.com.")
        return []

if __name__ == "__main__":
    billionaires_data = scrape_billionaires()

    if billionaires_data:
        df = pd.DataFrame(billionaires_data)
        df.to_csv("billionaires_forbes.csv", index=False)
        print("Scraping complete. Data saved to 'billionaires_forbes.csv'")
    else:
        print("No data found.")


# 8. Write a program to extract at least 500 Comments, Comment upvote and time when comment was posted from any YouTube Video. 

In [None]:
import requests
import json

def fetch_video_comments(video_id, api_key, max_results=500):
    base_url = "https://www.googleapis.com/youtube/v3/commentThreads"
    params = {
        "part": "snippet",
        "videoId": video_id,
        "key": api_key,
        "maxResults": max_results
    }

    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        comments_data = []
        for item in data['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comment_upvotes = item['snippet']['topLevelComment']['snippet']['likeCount']
            comment_time = item['snippet']['topLevelComment']['snippet']['publishedAt']
            comments_data.append({
                'comment': comment,
                'upvotes': comment_upvotes,
                'time': comment_time
            })
        return comments_data
    else:
        print("Failed to fetch comments:", response.status_code)
        return None

if __name__ == "__main__":
    video_id = input("Enter the YouTube video ID: ")
    api_key = input("Enter your YouTube Data API key: ")
    comments_data = fetch_video_comments(video_id, api_key)

    if comments_data:
        print("Comments extracted successfully.")
        for idx, comment_info in enumerate(comments_data, start=1):
            print(f"\nComment {idx}:")
            print("Text:", comment_info['comment'])
    
    print("Upvotes:", comment_info['upvotes'])
            print("Time:", comment_info['time'])
    else:
        print("No comments found or failed to fetch comments.")

        

# 9. Write a python program to scrape a data for all available Hostels from https://www.hostelworld.com/ in “London” location. You have to scrape hostel name, distance from city centre, ratings, total reviews, overall reviews, privates from price, dorms from price, facilities and property description. 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_hostels(location):
    url = f"https://www.hostelworld.com/findabed.php/ChosenCity.{location}/ChosenCountry.England"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        hostels = soup.find_all('div', class_='fabresult')

        results = []

        for hostel in hostels:
            name = hostel.find('h2', class_='title').text.strip()
            distance = hostel.find('span', class_='description').text.strip()
            rating = hostel.find('div', class_='score orange').text.strip()
            total_reviews = hostel.find('div', class_='reviews').text.strip()
            overall_reviews = hostel.find('div', class_='overall').text.strip()
            privates_price = hostel.find('div', class_='prices').find('div', class_='privates from').text.strip()
            dorms_price = hostel.find('div', class_='prices').find('div', class_='dorms from').text.strip()
            facilities = hostel.find('ul', class_='facilities').text.strip()
            description = hostel.find('div', class_='ratingtext').text.strip()

            result = {
                "Hostel Name": name,
                "Distance from City Centre": distance,
                "Rating": rating,
                "Total Reviews": total_reviews,
                "Overall Reviews": overall_reviews,
                "Privates from Price": privates_price,
                "Dorms from Price": dorms_price,
                "Facilities": facilities,
                "Description": description
            }

            results.append(result)

        return results
    else:
        print(f"Failed to fetch data from Hostelworld.com for {location}.")
        return []

if __name__ == "__main__":
    location = input("Enter the location (e.g., London) to search for hostels: ")
    hostels_data = scrape_hostels(location)

    if hostels_data:
        df = pd.DataFrame(hostels_data)
        df.to_csv(f"hostels_{location.lower()}.csv", index=False)
        print(f"Scraping complete. Data saved to 'hostels_{location.lower()}.csv'")
    else:
        print(f"No data found for hostels in {location}.")
