In [None]:
import requests
from bs4 import BeautifulSoup

def search_amazon(product):
    base_url = "https://www.amazon.in"
    search_url = f"{base_url}/s?k={product}"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(search_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract product details
        products = soup.find_all('div', {'class': 's-result-item'})
        for product in products:
            product_title = product.find('span', {'class': 'a-text-normal'})
            if product_title:
                print(product_title.text)

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    user_input = input("Enter the product you want to search on Amazon: "FLAIR Srx 0.7mm Retractable Ball Pen Box Pack | Triangular Body Design For Better Grip | Light Weight Refillable |)
    search_amazon(user_input)


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extract_product_details(product):
    brand = product.find('span', {'class': 'a-size-base-plus'}).get_text(strip=True) if product.find('span', {'class': 'a-size-base-plus'}) else '-'
    product_name = product.find('span', {'class': 'a-text-normal'}).get_text(strip=True) if product.find('span', {'class': 'a-text-normal'}) else '-'
    price = product.find('span', {'class': 'a-offscreen'}).get_text(strip=True) if product.find('span', {'class': 'a-offscreen'}) else '-'
    return_exchange = product.find('div', {'class': 'a-row a-size-base a-color-secondary'}).get_text(strip=True) if product.find('div', {'class': 'a-row a-size-base a-color-secondary'}) else '-'
    expected_delivery = product.find('span', {'class': 'a-text-bold'}).get_text(strip=True) if product.find('span', {'class': 'a-text-bold'}) else '-'
    availability = product.find('span', {'class': 'a-size-medium a-color-success'}).get_text(strip=True) if product.find('span', {'class': 'a-size-medium a-color-success'}) else '-'
    product_url = base_url + product.find('a', {'class': 'a-link-normal'})['href'] if product.find('a', {'class': 'a-link-normal'}) else '-'

    return {
        'Brand Name': brand,
        'Name of the Product': product_name,
        'Price': price,
        'Return/Exchange': return_exchange,
        'Expected Delivery': expected_delivery,
        'Availability': availability,
        'Product URL': product_url
    }

def search_amazon(product):
    base_url = "https://www.amazon.in"
    search_url = f"{base_url}/s?k={product}"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    all_products_data = []

    try:
        for page in range(1, 4):  # Scraping first 3 pages
            page_url = f"{search_url}&page={page}"
            response = requests.get(page_url, headers=headers)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')

            # Extract product details
            products = soup.find_all('div', {'class': 's-result-item'})
            for product in products:
                product_data = extract_product_details(product)
                all_products_data.append(product_data)

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")

    return all_products_data

if __name__ == "__main__":
    user_input = input("Enter the product you want to search on Amazon: ")
    product_data = search_amazon(user_input)

    # Convert data to DataFrame
    df = pd.DataFrame(product_data)

    # Save DataFrame to CSV
    df.to_csv('amazon_search_results.csv', index=False)

    print("Data saved to 'amazon_search_results.csv'")


In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs

def get_image_urls(search_query, num_images=10):
    url = "https://www.google.com/imghp"

    # Specify the path to your chromedriver executable
    chromedriver_path = "/path/to/chromedriver"

    # Set up a Chrome webdriver
    driver = webdriver.Chrome(executable_path=chromedriver_path)
    driver.get(url)

    # Locate the search bar
    search_box = driver.find_element("name", "q")

    # Enter the search query
    search_box.send_keys(search_query)

    # Press Enter
    search_box.send_keys(Keys.RETURN)

    # Wait for the page to load
    time.sleep(2)

    # Scroll down to load more images (optional)
    for _ in range(3):  # Scroll down 3 times
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Extract image URLs
    image_urls = []
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    for img_tag in soup.find_all('img', {'class': 'rg_i'}):
        img_url = img_tag.get('src')
        if img_url:
            # Some URLs are base64-encoded, skip those
            if not img_url.startswith('data:image'):
                image_urls.append(img_url)

        # Break when we have enough images
        if len(image_urls) >= num_images:
            break

    # Close the webdriver
    driver.quit()

    return image_urls

def download_images(image_urls, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for i, img_url in enumerate(image_urls):
        try:
            response = requests.get(img_url)
            response.raise_for_status()

            # Get the file extension from the URL
            parsed_url = urlparse(img_url)
            file_extension = os.path.splitext(parse_qs(parsed_url.query)['imgurl'][0])[-1]

            # Save the image
            with open(os.path.join(output_directory, f"{i+1}{file_extension}"), 'wb') as img_file:
                img_file.write(response.content)

        except requests.exceptions.RequestException as e:
            print(f"Error downloading image {i+1}: {e}")

if __name__ == "__main__":
    search_queries = ['fruits', 'cars', 'Machine Learning', 'Guitar', 'Cakes']
    num_images_per_query = 10

    for query in search_queries:
        print(f"Searching and downloading images for '{query}'...")
        image_urls = get_image_urls(query, num_images_per_query)
        download_images(image_urls, f"{query}_images")

    print("Image scraping completed.")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extract_smartphone_details(product):
    brand_name = product.find('div', {'class': '_4rR01T'}).get_text(strip=True) if product.find('div', {'class': '_4rR01T'}) else '-'
    smartphone_name = product.find('a', {'class': 'IRpwTa'}).get_text(strip=True) if product.find('a', {'class': 'IRpwTa'}) else '-'
    color = product.find('div', {'class': '_2WkVRV'}).get_text(strip=True) if product.find('div', {'class': '_2WkVRV'}) else '-'
    ram = product.find('li', {'class': 'rgWa7D'}).get_text(strip=True) if product.find('li', {'class': 'rgWa7D'}) else '-'
    storage = product.find('li', {'class': 'rgWa7D'}).find_next('li').get_text(strip=True) if product.find('li', {'class': 'rgWa7D'}).find_next('li') else '-'
    primary_camera = product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').get_text(strip=True) if product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li') else '-'
    secondary_camera = product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').find_next('li').get_text(strip=True) if product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').find_next('li') else '-'
    display_size = product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').find_next('li').find_next('li').get_text(strip=True) if product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').find_next('li').find_next('li') else '-'
    battery_capacity = product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').find_next('li').find_next('li').find_next('li').get_text(strip=True) if product.find('li', {'class': 'rgWa7D'}).find_next('li').find_next('li').find_next('li').find_next('li').find_next('li') else '-'
    price = product.find('div', {'class': '_30jeq3 _1_WHN1'}).get_text(strip=True) if product.find('div', {'class': '_30jeq3 _1_WHN1'}) else '-'
    product_url = 'https://www.flipkart.com' + product.find('a', {'class': 'IRpwTa'})['href'] if product.find('a', {'class': 'IRpwTa'}) else '-'

    return {
        'Brand Name': brand_name,
        'Smartphone Name': smartphone_name,
        'Colour': color,
        'RAM': ram,
        'Storage (ROM)': storage,
        'Primary Camera': primary_camera,
        'Secondary Camera': secondary_camera,
        'Display Size': display_size,
        'Battery Capacity': battery_capacity,
        'Price': price,
        'Product URL': product_url
    }

def search_flipkart(product):
    base_url = "https://www.flipkart.com"
    search_url = f"{base_url}/search?q={product}"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    all_products_data = []

    try:
        response = requests.get(search_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract smartphone details
        products = soup.find_all('div', {'class': '_1AtVbE'})
        for product in products:
            product_data = extract_smartphone_details(product)
            all_products_data.append(product_data)

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")

    return all_products_data

if __name__ == "__main__":
    user_input = input("Enter the smartphone you want to search on Flipkart: ")
    product_data = search_flipkart(user_input)

    # Convert data to DataFrame
    df = pd.DataFrame(product_data)

    # Save DataFrame to CSV
    df.to_csv('flipkart_smartphone_search_results.csv', index=False)

    print("Data saved to 'flipkart_smartphone_search_results.csv'")


In [None]:
import requests

def get_geolocation(api_key, address):
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    params = {
        'address': address,
        'key': api_key,
    }

    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()

        data = response.json()
        if data['status'] == 'OK':
            location = data['results'][0]['geometry']['location']
            latitude = location['lat']
            longitude = location['lng']
            return latitude, longitude
        else:
            print(f"Geocoding failed. Status: {data['status']}")
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    # Replace 'YOUR_API_KEY' with your actual Google Cloud Platform API key
    api_key = 'YOUR_API_KEY'
    
    # Input the city name or address you want to get geolocation for
    city_name = input("Enter the city name or address: ")

    coordinates = get_geolocation(api_key, city_name)

    if coordinates:
        print(f"Geospatial Coordinates (Latitude, Longitude) for {city_name}: {coordinates}")
    else:
        print("Failed to retrieve geospatial coordinates.")


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_forbes_billionaires():
    base_url = "https://www.forbes.com/billionaires/"
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(base_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        billionaires_data = []

        for row in soup.select('.table-row'):
            rank = row.select_one('.rank').get_text(strip=True)
            name = row.select_one('.personName').get_text(strip=True)
            net_worth = row.select_one('.netWorth').get_text(strip=True)
            age = row.select_one('.age').get_text(strip=True)
            citizenship = row.select_one('.countryOfCitizenship').get_text(strip=True)
            source = row.select_one('.source-column').get_text(strip=True)
            industry = row.select_one('.category').get_text(strip=True)

            billionaire_info = {
                'Rank': rank,
                'Name': name,
                'Net Worth': net_worth,
                'Age': age,
                'Citizenship': citizenship,
                'Source': source,
                'Industry': industry
            }

            billionaires_data.append(billionaire_info)

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None

    return billionaires_data

if __name__ == "__main__":
    billionaires_data = scrape_forbes_billionaires()

    if billionaires_data:
        # Convert data to DataFrame
        df = pd.DataFrame(billionaires_data)

        # Save DataFrame to CSV
        df.to_csv('forbes_billionaires.csv', index=False)

        print("Data saved to 'forbes_billionaires.csv'")
    else:
        print("Failed to retrieve data.")


In [None]:
import os
import googleapiclient.discovery
from datetime import datetime

def get_authenticated_service(api_key):
    return googleapiclient.discovery.build('youtube', 'v3', developerKey=api_key)

def get_video_comments(service, **kwargs):
    comments = []
    results = service.commentThreads().list(**kwargs).execute()

    while results:
        for item in results['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            comments.append({
                'text': comment['textDisplay'],
                'author': comment['authorDisplayName'],
                'upvotes': comment['likeCount'],
                'published_at': comment['publishedAt']
            })

        # Check if there are more comments
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = service.commentThreads().list(**kwargs).execute()
        else:
            break

    return comments

def main(api_key, video_id):
    service = get_authenticated_service(api_key)

    comments = get_video_comments(
        service,
        part='snippet',
        videoId=video_id,
        textFormat='plainText',
        order='time',  # You can change the order if needed
        maxResults=500
    )

    for i, comment in enumerate(comments, start=1):
        print(f"Comment {i}:")
        print(f"Text: {comment['text']}")
        print(f"Author: {comment['author']}")
        print(f"Upvotes: {comment['upvotes']}")
        published_at = datetime.strptime(comment['published_at'], "%Y-%m-%dT%H:%M:%SZ")
        print(f"Published At: {published_at}")
        print("\n")

if __name__ == "__main__":
    # Replace 'YOUR_API_KEY' with your actual Google Cloud Platform API key
    api_key = 'YOUR_API_KEY'

    # Replace 'VIDEO_ID' with the actual video ID you want to fetch comments for
    video_id = 'VIDEO_ID'

    main(api_key, video_id)


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_hostels_in_london():
    base_url = "https://www.hostelworld.com"
    search_url = f"{base_url}/search?search_keywords=London&country=England&city=London&type=city&id=3&from=2023-12-01&to=2023-12-02&guests=1&page=1"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(search_url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, 'html.parser')

        hostels_data = []

        for hostel_card in soup.select('.property-card'):
            hostel_name = hostel_card.select_one('.property-name').get_text(strip=True)
            distance_from_city_center = hostel_card.select_one('.property-card__distance').get_text(strip=True)
            ratings = hostel_card.select_one('.score-orange').get_text(strip=True)
            total_reviews = hostel_card.select_one('.reviews').get_text(strip=True)
            overall_reviews = hostel_card.select_one('.keyword').get_text(strip=True)
            privates_from_price = hostel_card.select_one('.price-col-private .price').get_text(strip=True)
            dorms_from_price = hostel_card.select_one('.price-col-dorm .price').get_text(strip=True)
            facilities = ', '.join([facility.get_text(strip=True) for facility in hostel_card.select('.facilities-list-item')])
            property_description = hostel_card.select_one('.property-card__description').get_text(strip=True)

            hostel_info = {
                'Hostel Name': hostel_name,
                'Distance from City Centre': distance_from_city_center,
                'Ratings': ratings,
                'Total Reviews': total_reviews,
                'Overall Reviews': overall_reviews,
                'Privates from Price': privates_from_price,
                'Dorms from Price': dorms_from_price,
                'Facilities': facilities,
                'Property Description': property_description
            }

            hostels_data.append(hostel_info)

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None

    return hostels_data

if __name__ == "__main__":
    hostels_data = scrape_hostels_in_london()

    if hostels_data:
        for i, hostel_info in enumerate(hostels_data, start=1):
            print(f"Hostel {i}:")
            for key, value in hostel_info.items():
                print(f"{key}: {value}")
            print("\n")
    else:
        print("Failed to retrieve data.")
