In [None]:
import requests
from bs4 import BeautifulSoup

def search_amazon_products(product):
    base_url = "https://www.amazon.in/s"
    params = {
        "k": product
    }

    response = requests.get(base_url, params=params)
    soup = BeautifulSoup(response.content, "html.parser")

    product_elements = soup.find_all("div", {"data-component-type": "s-search-result"})
    
    for element in product_elements:
        title_element = element.find("span", {"class": "a-size-medium"})
        price_element = element.find("span", {"class": "a-offscreen"})
        
        if title_element and price_element:
            title = title_element.text.strip()
            price = price_element.text.strip()
            
            print(f"Title: {title}")
            print(f"Price: {price}")
            print("-" * 30)

# Get the product to search from the user
product = input("Enter a product to search on Amazon: ")

# Perform the search
search_amazon_products(product)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_product_details(product):
    base_url = "https://www.amazon.in/s"
    params = {
        "k": product
    }

    product_data = []
    page_limit = 3
    page_count = 0
    
    while page_count < page_limit:
        response = requests.get(base_url, params=params)
        soup = BeautifulSoup(response.content, "html.parser")

        product_elements = soup.find_all("div", {"data-component-type": "s-search-result"})
        
        if not product_elements:  # No more pages for the product
            break

        for element in product_elements:
            title_element = element.find("span", {"class": "a-size-medium"})
            price_element = element.find("span", {"class": "a-offscreen"})
            return_element = element.find("span", {"class": "a-truncate-cut"})
            delivery_element = element.find("span", {"class": "s-info-strip-text"})
            availability_element = element.find("span", {"class": "a-size-base"})
            url_element = element.find("a", {"class": "a-link-normal"})

            title = title_element.text.strip() if title_element else "-"
            price = price_element.text.strip() if price_element else "-"
            return_policy = return_element.text.strip() if return_element else "-"
            delivery = delivery_element.text.strip() if delivery_element else "-"
            availability = availability_element.text.strip() if availability_element else "-"
            url = "https://www.amazon.in" + url_element["href"] if url_element else "-"

            product_data.append({
                "Brand Name": "-",
                "Name of the Product": title,
                "Price": price,
                "Return/Exchange": return_policy,
                "Expected Delivery": delivery,
                "Availability": availability,
                "Product URL": url
            })

        # Go to the next page
        next_page_element = soup.find("a", {"class": "s-pagination-item s-pagination-next"})
        if next_page_element:
            params["page"] = next_page_element["href"]
            page_count += 1
        else:
            break

    return product_data

# Get the product to search from the user
product = input("Enter a product to search on Amazon: ")

# Scrape the product details
products = scrape_product_details(product)

# Create a DataFrame from the scraped data
df = pd.DataFrame(products)

# Save the DataFrame to a CSV file
df.to_csv("amazon_products.csv", index=False)

# Display the DataFrame
print(df)


In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import requests
import os

def scrape_images(keyword, num_images):
    # Configure Chrome options
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run Chrome in headless mode (without opening GUI)
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    # Set path to your chromedriver executable
    chromedriver_path = "path_to_chromedriver"

    # Initialize Chrome webdriver
    driver = webdriver.Chrome(chromedriver_path, options=options)

    # Open Google Images
    driver.get("https://www.google.com/imghp")

    # Find the search bar element
    search_bar = driver.find_element_by_name("q")

    # Enter the keyword and press Enter
    search_bar.send_keys(keyword + Keys.RETURN)

    # Wait for the page to load
    time.sleep(2)

    # Scroll down to load more images
    for _ in range(3):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Find all image elements on the page
    image_elements = driver.find_elements_by_css_selector(".rg_i")

    # Create a directory to save the images
    save_dir = keyword.lower()
    os.makedirs(save_dir, exist_ok=True)

    # Download the images
    for i, image_element in enumerate(image_elements[:num_images]):
        image_url = image_element.get_attribute("src")
        if image_url:
            response = requests.get(image_url)
            with open(os.path.join(save_dir, f"{keyword}_{i+1}.jpg"), "wb") as f:
                f.write(response.content)

    # Close the browser
    driver.quit()

# Keywords and number of images to scrape
keywords = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
num_images = 10

# Scrape images for each keyword
for keyword in keywords:
    scrape_images(keyword, num_images)
    print(f"Scraped {num_images} images for '{keyword}'")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_flipkart_smartphones(search_query):
    base_url = "https://www.flipkart.com"
    search_url = base_url + "/search"
    params = {
        "q": search_query
    }

    response = requests.get(search_url, params=params)
    soup = BeautifulSoup(response.content, "html.parser")

    product_elements = soup.find_all("div", {"class": "_1AtVbE"})
    
    product_data = []
    for element in product_elements:
        brand_element = element.find("div", {"class": "_4rR01T"})
        name_element = element.find("a", {"class": "IRpwTa"})
        url_element = element.find("a", {"class": "_1fQZEK"})
        price_element = element.find("div", {"class": "_30jeq3 _1_WHN1"})
        colour_element = element.find("div", {"class": "_4rR01T"})
        ram_element = element.find("ul", {"class": "_1xgFaf"})
        storage_element = element.find("ul", {"class": "_1xgFaf"})
        primary_camera_element = element.find("ul", {"class": "_1xgFaf"})
        secondary_camera_element = element.find("ul", {"class": "_1xgFaf"})
        display_element = element.find("ul", {"class": "_1xgFaf"})
        battery_element = element.find("ul", {"class": "_1xgFaf"})

        brand = brand_element.text.strip() if brand_element else "-"
        name = name_element.text.strip() if name_element else "-"
        url = base_url + url_element["href"] if url_element else "-"
        price = price_element.text.strip().replace("â‚¹", "") if price_element else "-"
        colour = colour_element.contents[2].strip() if colour_element else "-"
        ram = ram_element.contents[4].text.strip() if ram_element else "-"
        storage = storage_element.contents[8].text.strip() if storage_element else "-"
        primary_camera = primary_camera_element.contents[12].text.strip() if primary_camera_element else "-"
        secondary_camera = secondary_camera_element.contents[16].text.strip() if secondary_camera_element else "-"
        display = display_element.contents[20].text.strip() if display_element else "-"
        battery = battery_element.contents[24].text.strip() if battery_element else "-"

        product_data.append({
            "Brand Name": brand,
            "Smartphone Name": name,
            "Colour": colour,
            "RAM": ram,
            "Storage(ROM)": storage,
            "Primary Camera": primary_camera,
            "Secondary Camera": secondary_camera,
            "Display Size": display,
            "Battery Capacity": battery,
            "Price": price,
            "Product URL": url
        })

    return product_data

# Get the smartphone to search from the user
search_query = input("Enter a smartphone to search on Flipkart: ")

# Scrape the smartphone details
products = scrape_flipkart_smartphones(search_query)

# Create a DataFrame from the scraped data
df = pd.DataFrame(products)

# Save the DataFrame to a CSV file
df.to_csv("flipkart_smartphones.csv", index=False)

# Display the DataFrame
print(df)


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_coordinates(city):
    base_url = "https://www.google.com/maps/search/"
    query = city.replace(" ", "+")
    url = base_url + query

    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    # Find the div element containing the coordinates
    coordinates_div = soup.find("meta", {"itemprop": "image"})
    if coordinates_div:
        coordinates = coordinates_div["content"].split("=")[-1].split(",")
        latitude = coordinates[0]
        longitude = coordinates[1]
        return latitude, longitude
    else:
        return None, None

# Get the city to search from the user
city = input("Enter a city to search on Google Maps: ")

# Scrape the coordinates
latitude, longitude = scrape_coordinates(city)

# Display the coordinates
if latitude and longitude:
    print(f"Latitude: {latitude}")
    print(f"Longitude: {longitude}")
else:
    print("Coordinates not found.")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_funding_deals():
    url = "https://trak.in/india-startup-funding-investment-2015/"

    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    table = soup.find("table", {"id": "tablepress-48"})

    rows = table.find_all("tr")

    data = []
    for row in rows[1:]:
        cells = row.find_all("td")
        if len(cells) >= 5:
            date = cells[1].text.strip()
            startup = cells[2].text.strip()
            industry = cells[3].text.strip()
            sub_vertical = cells[4].text.strip()
            city = cells[5].text.strip()
            investor = cells[6].text.strip()
            investment_type = cells[7].text.strip()
            amount = cells[8].text.strip()

            data.append({
                "Date": date,
                "Startup": startup,
                "Industry": industry,
                "Sub Vertical": sub_vertical,
                "City": city,
                "Investor": investor,
                "Investment Type": investment_type,
                "Amount": amount
            })

    return data

# Scrape the funding deals
funding_deals = scrape_funding_deals()

# Create a DataFrame from the scraped data
df = pd.DataFrame(funding_deals)

# Filter the DataFrame for the second quarter (January to March 2021)
start_date = "2021-01-01"
end_date = "2021-03-31"
filtered_df = df[(df["Date"] >= start_date) & (df["Date"] <= end_date)]

# Save the DataFrame to a CSV file
filtered_df.to_csv("funding_deals_q2_2021.csv", index=False)

# Display the DataFrame
print(filtered_df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_gaming_laptops():
    url = "https://www.digit.in/top-products/best-gaming-laptops-40.html"

    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    laptops = soup.find_all("div", {"class": "TopNumbeHeading"})
    details = soup.find_all("div", {"class": "detail-list"})

    data = []
    for i in range(len(laptops)):
        laptop_name = laptops[i].text.strip()
        laptop_details = details[i].find_all("li")

        specifications = {}
        for detail in laptop_details:
            key = detail.find("div", {"class": "prod-spec"}).text.strip()
            value = detail.find("div", {"class": "prod-specs-value"}).text.strip()
            specifications[key] = value

        data.append({
            "Laptop Name": laptop_name,
            "Specifications": specifications
        })

    return data

# Scrape the gaming laptops details
gaming_laptops = scrape_gaming_laptops()

# Create a DataFrame from the scraped data
df = pd.DataFrame(gaming_laptops)

# Save the DataFrame to a CSV file
df.to_csv("gaming_laptops_details.csv", index=False)

# Display the DataFrame
print(df)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_billionaires():
    url = "https://www.forbes.com/billionaires/"

    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    table = soup.find("table", {"class": "data"})

    rows = table.find_all("tr")[1:]

    data = []
    for row in rows:
        cells = row.find_all("td")
        rank = cells[0].text.strip()
        name = cells[1].text.strip()
        net_worth = cells[2].text.strip()
        age = cells[3].text.strip()
        citizenship = cells[4].text.strip()
        source = cells[5].text.strip()
        industry = cells[6].text.strip()

        data.append({
            "Rank": rank,
            "Name": name,
            "Net Worth": net_worth,
            "Age": age,
            "Citizenship": citizenship,
            "Source": source,
            "Industry": industry
        })

    return data

# Scrape the billionaire details
billionaires = scrape_billionaires()

# Create a DataFrame from the scraped data
df = pd.DataFrame(billionaires)

# Save the DataFrame to a CSV file
df.to_csv("billionaires_details.csv", index=False)

# Display the DataFrame
print(df)


In [None]:
import requests
import json
from datetime import datetime

# Set up your YouTube Data API key
API_KEY = "YOUR_API_KEY"

# Specify the video ID of the YouTube video
VIDEO_ID = "YOUR_VIDEO_ID"

# Specify the maximum number of comments to extract
MAX_COMMENTS = 500

def get_video_comments(video_id, api_key, max_comments):
    # Construct the API endpoint URL
    url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={video_id}&maxResults={max_comments}&key={api_key}"

    # Send the API request
    response = requests.get(url)
    data = response.json()

    comments = []
    for item in data["items"]:
        comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
        upvotes = item["snippet"]["topLevelComment"]["snippet"]["likeCount"]
        time = item["snippet"]["topLevelComment"]["snippet"]["publishedAt"]

        # Convert the time to a human-readable format
        time = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d %H:%M:%S")

        comments.append({
            "Comment": comment,
            "Upvotes": upvotes,
            "Time": time
        })

    return comments

# Get the video comments
video_comments = get_video_comments(VIDEO_ID, API_KEY, MAX_COMMENTS)

# Display the comments
for comment in video_comments:
    print(f"Comment: {comment['Comment']}")
    print(f"Upvotes: {comment['Upvotes']}")
    print(f"Time: {comment['Time']}")
    print()

# Save the comments to a JSON file
with open("video_comments.json", "w") as file:
    json.dump(video_comments, file, indent=4)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_hostels():
    url = "https://www.hostelworld.com/search?city=London&country=England"

    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    hostel_items = soup.find_all("div", {"class": "property-card"})

    data = []
    for item in hostel_items:
        hostel_name = item.find("h2", {"class": "title-row"}).text.strip()
        distance = item.find("span", {"class": "distance"}).text.strip()
        rating = item.find("div", {"class": "score orange"}).text.strip()
        total_reviews = item.find("div", {"class": "reviews"}).text.strip()
        overall_reviews = item.find("div", {"class": "keyword"}).text.strip()
        privates_from_price = item.find("div", {"class": "price-col"})
        privates_from_price = privates_from_price.find("a").text.strip() if privates_from_price else "-"
        dorms_from_price = item.find("div", {"class": "price-col-alt"})
        dorms_from_price = dorms_from_price.find("a").text.strip() if dorms_from_price else "-"
        facilities = item.find("div", {"class": "facilities-label"}).text.strip()
        description = item.find("div", {"class": "additional-info"}).text.strip()

        data.append({
            "Hostel Name": hostel_name,
            "Distance": distance,
            "Rating": rating,
            "Total Reviews": total_reviews,
            "Overall Reviews": overall_reviews,
            "Privates from Price": privates_from_price,
            "Dorms from Price": dorms_from_price,
            "Facilities": facilities,
            "Description": description
        })

    return data

# Scrape the hostel data
hostels = scrape_hostels()

# Create a DataFrame from the scraped data
df = pd.DataFrame(hostels)

# Save the DataFrame to a CSV file
df.to_csv("hostels_data.csv", index=False)

# Display the DataFrame
print(df)
