In [None]:
import requests
from typing import List
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def perform_search(query: str, num_results: int = 10, user_profile: str = "") -> List[str]:
    """
    Performs a search using the given query and returns a list of URLs.

    Args:
        query (str): The search query.
        num_results (int, optional): The number of search results to return. Defaults to 10.
        user_profile (str, optional): The user profile for personalized results. Defaults to "".

    Returns:
        List[str]: A list of URLs matching the search query.
    """
    # Set up Firefox options
    firefox_options = Options()
    firefox_options.headless = True  # Run Firefox in headless mode

    # Create a new instance of the Firefox driver
    driver = webdriver.Firefox(options=firefox_options)

    try:
        # Construct the search URL with personalized results
        search_url = f"https://www.google.com/search?q={query}&num={num_results}&pws=0&gl=us&hl=en"
        if user_profile:
            search_url += f"&authuser={user_profile}"

        # Navigate to the search URL
        driver.get(search_url)

        # Wait for the search results to load (up to 10 seconds)
        try:
            search_results = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.yuRUbf"))
            )
        except TimeoutException:
            print("Timed out waiting for search results to load.")
            return []

        # Extract the URLs from the search results
        urls = [result.find_element(By.CSS_SELECTOR, "a").get_attribute("href") for result in search_results]

        # Perform additional search techniques
        related_searches = driver.find_elements(By.CSS_SELECTOR, "div.EIaa9b")
        for related_search in related_searches:
            related_query = related_search.text
            related_urls = perform_search(related_query, num_results=5)
            urls.extend(related_urls)

        return urls

    finally:
        # Close the browser
        driver.quit()

# Example usage
search_query = "telugu movies"
num_results = 50
user_profile = "user1"

search_results = perform_search(search_query, num_results, user_profile)

print(f"Search Results for '{search_query}':")
for url in search_results:
    print(url)

In [None]:
import json
import requests
from typing import List
from urllib.parse import urlparse
from bs4 import BeautifulSoup


def extract_url_info(urls: List[str]) -> List[dict]:
    """
    Extract information from a list of URLs and return the details as a list of dictionaries.

    Args:
        urls (List[str]): A list of URLs to extract information from.

    Returns:
        List[dict]: A list of dictionaries containing the extracted information for each URL.
    """
    url_info_list = []

    for url in urls:
        url_info = {
            'url': url,
            'content_type': '',
            'content_length': '',
            'scheme': '',
            'netloc': '',
            'path': '',
            'query': '',
            'fragment': '',
            'text_content': '',
            'error': ''
        }

        try:
            # Send a GET request to the URL
            response = requests.get(url)

            # Check if the request was successful
            if response.status_code == 200:
                # Extract information from the response
                url_info['content_type'] = response.headers.get('Content-Type')
                url_info['content_length'] = response.headers.get('Content-Length')
                parsed_url = urlparse(url)
                url_info['scheme'] = parsed_url.scheme
                url_info['netloc'] = parsed_url.netloc
                url_info['path'] = parsed_url.path
                url_info['query'] = parsed_url.query
                url_info['fragment'] = parsed_url.fragment

                # Extract basic text content from the URL
                soup = BeautifulSoup(response.text, 'html.parser')
                url_info['text_content'] = soup.get_text(strip=True)[:500] + '...'  # Limit to 100 characters
            else:
                url_info['error'] = f"Failed to retrieve information for URL: {url}"
        except requests.exceptions.RequestException as e:
            url_info['error'] = f"Error occurred while processing URL: {url}. Error message: {str(e)}"

        url_info_list.append(url_info)

    return url_info_list


# Example usage
url_list = [
    'https://openai.com/',
    'https://www.youtube.com/watch?v=XGJNo8TpuVA',
    "https://www.towermarketing.net/blog/google-best-search-engine/#:~:text=A%20large%20result%20of%20Google's,than%20any%20other%20search%20engine.",
    "https://www.towermarketing.net/blog/google-best-search-engine/#:~:text=A%20large%20result%20of%20Google's,than%20any%20other%20search%20engine."
]

url_details = extract_url_info(url_list)

# Print the extracted information in JSON format
print(json.dumps(url_details, indent=2))

In [None]:
import json
from typing import List
from urllib.parse import urlparse
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup


def extract_url_info(urls: List[str]) -> List[dict]:
    """
    Extract information from a list of URLs and return the details as a list of dictionaries.

    Args:
        urls (List[str]): A list of URLs to extract information from.

    Returns:
        List[dict]: A list of dictionaries containing the extracted information for each URL.
    """
    url_info_list = []

    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode

    driver = webdriver.Firefox()


    for url in urls:
        url_info = {
            'url': url,
            'content_type': '',
            'content_length': '',
            'scheme': '',
            'netloc': '',
            'path': '',
            'query': '',
            'fragment': '',
            'text_content': '',
            'error': ''
        }

        try:
            # Navigate to the URL
            driver.get(url)

            # Wait for the page to load completely
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )

            # Extract information from the page
            url_info['content_type'] = driver.execute_script("return document.contentType;")
            url_info['content_length'] = driver.execute_script("return document.body.innerHTML.length;")
            parsed_url = urlparse(url)
            url_info['scheme'] = parsed_url.scheme
            url_info['netloc'] = parsed_url.netloc
            url_info['path'] = parsed_url.path
            url_info['query'] = parsed_url.query
            url_info['fragment'] = parsed_url.fragment

            # Extract the complete text content from the URL
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            url_info['text_content'] = soup.get_text(strip=True)
        except Exception as e:
            url_info['error'] = f"Error occurred while processing URL: {url}. Error message: {str(e)}"

        url_info_list.append(url_info)

    # Quit the WebDriver
    driver.quit()

    return url_info_list

# Example usage
url_list = [
    'https://openai.com/',
    'https://www.youtube.com/watch?v=XGJNo8TpuVA',
    "https://www.towermarketing.net/blog/google-best-search-engine/#:~:text=A%20large%20result%20of%20Google's,than%20any%20other%20search%20engine.",
    "https://en.wikipedia.org/wiki/Kareena_Kapoor_Khan_filmography"
]

url_details = extract_url_info(url_list)

# Print the extracted information in JSON format
print(json.dumps(url_details, indent=2))

In [None]:
import os
import json
from typing import List, Dict

def find_driver_paths(search_paths: List[str], driver_names: List[str]) -> Dict[str, List[str]]:
    """
    Finds the paths of browser drivers on the user's operating system.

    Args:
        search_paths: A list of directories to search for browser drivers.
        driver_names: A list of browser driver names to search for.

    Returns:
        A dictionary with driver names as keys and lists of driver paths as values.
    """
    driver_paths = {}

    for path in search_paths:
        for root, dirs, files in os.walk(path):
            for file in files:
                for driver in driver_names:
                    if driver in file.lower():
                        driver_path = os.path.join(root, file)
                        if driver not in driver_paths:
                            driver_paths[driver] = []
                        driver_paths[driver].append(driver_path)

    return driver_paths

def save_to_json(data: Dict[str, List[str]], file_path: str) -> None:
    """
    Saves the driver information to a JSON file.

    Args:
        data: A dictionary containing the driver information.
        file_path: The path to the JSON file.
    """
    with open(file_path, "w") as file:
        json.dump(data, file, indent=2)

def main() -> None:
    """
    Main function that finds the browser driver paths and saves the information to a JSON file.
    """
    search_paths = [
        "/",  # Linux and macOS root directory
        "C:\\",  # Windows root directory
        # Add more search paths if needed
    ]

    driver_names = [
        "chromedriver",
        "geckodriver",
        # Add more driver names if needed
    ]

    driver_paths = find_driver_paths(search_paths, driver_names)
    output_file = "driver_info.json"
    save_to_json(driver_paths, output_file)

    print(f"Driver information saved to {output_file}:")
    print(json.dumps(driver_paths, indent=2))

if __name__ == "__main__":
    main()

In [None]:
import json
import requests
from typing import List
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC


def extract_url_info(urls: List[str], proxy: str = None) -> List[dict]:
    """
    Extract information from a list of URLs and return the details as a list of dictionaries.

    Args:
        urls (List[str]): A list of URLs to extract information from.
        proxy (str): Proxy server URL to use for requests (optional).

    Returns:
        List[dict]: A list of dictionaries containing the extracted information for each URL.
    """
    url_info_list = []

    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run Chrome in headless mode
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--ignore-certificate-errors")
    chrome_options.add_argument("--allow-running-insecure-content")
    chrome_options.add_argument("--disable-web-security")
    chrome_options.add_argument("--disable-site-isolation-trials")

    if proxy:
        chrome_options.add_argument(f"--proxy-server={proxy}")

    # Set up Chrome WebDriver
    service = Service("/Users/heman/.cache/selenium/firefox/win64/122.0.1/firefox.exe")  # Replace with the path to your chromedriver executable
    driver = webdriver.Firefox(service=service, options=chrome_options)

    for url in urls:
        url_info = {
            'url': url,
            'content_type': '',
            'content_length': '',
            'scheme': '',
            'netloc': '',
            'path': '',
            'query': '',
            'fragment': '',
            'text_content': '',
            'error': ''
        }

        try:
            # Navigate to the URL
            driver.get(url)

            # Wait for the page to load completely
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )

            # Extract information from the page
            url_info['content_type'] = driver.execute_script("return document.contentType;")
            url_info['content_length'] = driver.execute_script("return document.body.innerHTML.length;")
            parsed_url = urlparse(url)
            url_info['scheme'] = parsed_url.scheme
            url_info['netloc'] = parsed_url.netloc
            url_info['path'] = parsed_url.path
            url_info['query'] = parsed_url.query
            url_info['fragment'] = parsed_url.fragment

            # Extract the complete text content from the URL
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            url_info['text_content'] = soup.get_text(strip=True)
        except Exception as e:
            url_info['error'] = f"Error occurred while processing URL: {url}. Error message: {str(e)}"

        url_info_list.append(url_info)

    # Quit the WebDriver
    driver.quit()

    return url_info_list


# Example usage
url_list = [
    'https://www.selenium.dev/',
    'https://www.youtube.com/watch?v=XGJNo8TpuVA',

]

# Specify a proxy server URL if needed (replace with your own proxy)
proxy_url = "http://proxy.example.com:8080"

url_details = extract_url_info(url_list,proxy=False)

# Print the extracted information in JSON format
print(json.dumps(url_details, indent=2))

In [None]:
import os
import requests
from typing import List
from urllib.parse import urlparse

def download_files(url_list: List[str]) -> None:
    for url in url_list:
        download_file(url)

def download_file(url: str) -> None:
    response = requests.get(url)
    file_name = os.path.basename(urlparse(url).path)
    file_extension = os.path.splitext(file_name)[1]

    if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
        folder = 'images'
    elif file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.mpeg', '.mpg', '.3gp', '.rm', '.rmvb', '.ts', '.asf', '.vob', '.ogv']:
        folder = 'videos'
    elif file_extension in ['.txt', '.doc', '.docx', '.rtf', '.pdf', '.html', '.htm', '.xml', '.json', '.csv', '.tsv', '.log', '.cfg', '.conf', '.ini', '.yaml', '.yml', '.md', '.markdown', '.tex']:
        folder = 'text'
    else:
        print(f'Unsupported file type: {file_extension}')
        return

    subfolder = os.path.join(folder, file_extension.lstrip('.'))
    os.makedirs(subfolder, exist_ok=True)
    with open(os.path.join(subfolder, file_name), 'wb') as file:
        file.write(response.content)

# Example usage:
# download_files(['http://example.com/image.jpg', 'http://example.com/video.mp4', 'http://example.com/document.pdf'])


# Example usage
file_urls = [
    'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/01.png',
    'https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif',
    # 'https://example.com/video1.mp4',
    # 'https://example.com/video2.avi',
    # 'https://example.com/otherfile.pdf'
]
download_files(file_urls)

In [None]:
import os
import requests
from typing import List

def download_files(url_list: List[str], download_path: str) -> None:
    file_types = {
        'image': ['.jpg', '.jpeg', '.png', '.gif', '.bmp'],
        'video': ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.mpeg', '.mpg', '.3gp', '.rm', '.rmvb', '.ts', '.asf', '.vob', '.ogv'],
        'text': ['.txt', '.doc', '.docx', '.rtf', '.pdf', '.html', '.htm', '.xml', '.json', '.csv', '.tsv', '.log', '.cfg', '.conf', '.ini', '.yaml', '.yml', '.md', '.markdown', '.tex']
    }

    for url in url_list:
        file_name = url.split('/')[-1]
        file_extension = os.path.splitext(file_name)[-1]

        for file_type, extensions in file_types.items():
            if file_extension in extensions:
                folder_path = os.path.join(download_path, file_type, file_extension.replace('.', ''))
                os.makedirs(folder_path, exist_ok=True)

                response = requests.get(url)
                with open(os.path.join(folder_path, file_name), 'wb') as file:
                    file.write(response.content)
                break


# Example usage
file_urls = [
    'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/01.png',
    'https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif',
    # 'https://example.com/video1.mp4',
    # 'https://example.com/video2.avi',
    # 'https://example.com/otherfile.pdf'
]
download_files(file_urls,download_path=r"C:/Users/heman/Desktop/Deep learning/file_operations-")

In [None]:
import os
import requests
from typing import List
from urllib.parse import urlparse

def download_file(url: str, folder: str) -> None:
    """
    Download a file from the given URL and save it in the specified folder.
    
    Args:
        url (str): The URL of the file to download.
        folder (str): The folder to save the downloaded file.
    """
    response = requests.get(url)
    if response.status_code == 200:
        file_name = os.path.basename(urlparse(url).path)
        file_path = os.path.join(folder, file_name)
        with open(file_path, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded: {file_name}")
    else:
        print(f"Failed to download: {url}")

def create_folder(folder: str) -> None:
    """
    Create a folder if it doesn't exist.
    
    Args:
        folder (str): The folder to create.
    """
    if not os.path.exists(folder):
        os.makedirs(folder)

def download_files(urls: List[str]) -> None:
    """
    Download files from the given list of URLs into corresponding folders.
    
    Args:
        urls (List[str]): The list of URLs to download files from.
    """
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.mpeg', '.mpg', '.3gp', '.rm', '.rmvb', '.ts', '.asf', '.vob', '.ogv']
    text_extensions = ['.txt', '.doc', '.docx', '.rtf', '.pdf', '.html', '.htm', '.xml', '.json', '.csv', '.tsv', '.log', '.cfg', '.conf', '.ini', '.yaml', '.yml', '.md', '.markdown', '.tex']
    
    for url in urls:
        file_extension = os.path.splitext(urlparse(url).path)[1].lower()
        
        if file_extension in image_extensions:
            folder = os.path.join('images', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in video_extensions:
            folder = os.path.join('videos', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in text_extensions:
            folder = os.path.join('texts', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        else:
            print(f"Unsupported file extension: {file_extension}")

# Example usage
urls = [
    'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/01.png',
    'https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif',
    # 'https://example.com/video1.mp4',
    # 'https://example.com/video2.avi',
    # 'https://example.com/otherfile.pdf'
]

download_files(urls)

In [None]:
import os
import requests
from typing import List
from pathlib import Path

def download_files(file_urls: List[str], save_dir: str = 'downloads'):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    for url in file_urls:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            file_extension = Path(url).suffix.lower()
            save_path = os.path.join(save_dir, f'file{file_extension}')

            if file_extension == '.png':
                file_folder = 'images'
            elif file_extension in ['.mp4', '.avi', '.wmv', '.mov']:
                file_folder = 'videos'
            else:
                file_folder = 'other'

            save_path = os.path.join(save_dir, file_folder, save_path)

            if not os.path.exists(os.path.dirname(save_path)):
                os.makedirs(os.path.dirname(save_path))

            with open(save_path, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            print(f'Successfully downloaded: {url} to {save_path}')
        else:
            print(f'Failed to download: {url}')

# Example usage
file_urls = [
    'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/01.png',
    'https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif',
    # 'https://example.com/video1.mp4',
    # 'https://example.com/video2.avi',
    # 'https://example.com/otherfile.pdf'
]

download_files(file_urls)

In [None]:
import os
import requests
from typing import List
from urllib.parse import urlparse

def download_file(url: str, folder: str) -> None:
    """
    Download a file from the given URL or local path and save it in the specified folder.
    
    Args:
        url (str): The URL or local path of the file to download.
        folder (str): The folder to save the downloaded file.
    """
    if url.startswith(('http://', 'https://')):
        response = requests.get(url)
        if response.status_code == 200:
            file_name = os.path.basename(urlparse(url).path)
            file_path = os.path.join(folder, file_name)
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded: {file_name}")
        else:
            print(f"Failed to download: {url}")
    else:
        file_name = os.path.basename(url)
        file_path = os.path.join(folder, file_name)
        with open(url, 'rb') as src_file, open(file_path, 'wb') as dst_file:
            dst_file.write(src_file.read())
        print(f"Copied: {file_name}")

def create_folder(folder: str) -> None:
    """
    Create a folder if it doesn't exist.
    
    Args:
        folder (str): The folder to create.
    """
    if not os.path.exists(folder):
        os.makedirs(folder)

def download_files(urls: List[str]) -> None:
    """
    Download files from the given list of URLs or local paths into corresponding folders.
    
    Args:
        urls (List[str]): The list of URLs or local paths to download files from.
    """
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.mpeg', '.mpg', '.3gp', '.rm', '.rmvb', '.ts', '.asf', '.vob', '.ogv']
    text_extensions = ['.txt', '.doc', '.docx', '.rtf', '.pdf', '.html', '.htm', '.xml', '.json', '.csv', '.tsv', '.log', '.cfg', '.conf', '.ini', '.yaml', '.yml', '.md', '.markdown', '.tex']
    archive_extensions = ['.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar', '.tgz', '.tbz2', '.txz', '.t7z', '.zipx', '.iso', '.z', '.arj']
    
    for url in urls:
        file_extension = os.path.splitext(url)[1].lower()
        
        if file_extension in image_extensions:
            folder = os.path.join('images', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in video_extensions:
            folder = os.path.join('videos', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in text_extensions:
            folder = os.path.join('texts', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in archive_extensions:
            folder = os.path.join('archives', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        else:
            print(f"Unsupported file extension: {file_extension}")
# Example usage
file_urls = [
    'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/01.png',
    'https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif',
    'C:/Users/heman/Desktop/Deep learning/downloads/test-other.tar.gz',
     'C:/Users/heman/Desktop/Deep learning/datasets/dvc.yaml',
    # 'https://example.com/otherfile.pdf'
]

download_files(urls)

In [None]:
import os
import requests
import shutil
from typing import List
from urllib.parse import urlparse

def download_file(url: str, folder: str) -> None:
    """
    Download a file from the given URL or local path and save it in the specified folder.
    
    Args:
        url (str): The URL or local path of the file to download.
        folder (str): The folder to save the downloaded file.
    """
    if url.startswith(('http://', 'https://')):
        response = requests.get(url)
        if response.status_code == 200:
            file_name = os.path.basename(urlparse(url).path)
            file_path = os.path.join(folder, file_name)
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded: {file_name}")
        else:
            print(f"Failed to download: {url}")
    else:
        file_name = os.path.basename(url)
        file_path = os.path.join(folder, file_name)
        with open(url, 'rb') as src_file, open(file_path, 'wb') as dst_file:
            dst_file.write(src_file.read())
        print(f"Copied: {file_name}")

def create_folder(folder: str) -> None:
    """
    Create a folder if it doesn't exist.
    
    Args:
        folder (str): The folder to create.
    """
    if not os.path.exists(folder):
        os.makedirs(folder)

def extract_archive(file_path: str, folder: str) -> None:
    """
    Extract the contents of an archive file to the specified folder.
    
    Args:
        file_path (str): The path to the archive file.
        folder (str): The folder to extract the contents to.
    """
    shutil.unpack_archive(file_path, folder)
    print(f"Extracted: {os.path.basename(file_path)}")

def download_files(urls: List[str]) -> None:
    """
    Download files from the given list of URLs or local paths into corresponding folders.
    
    Args:
        urls (List[str]): The list of URLs or local paths to download files from.
    """
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp']
    video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv', '.webm', '.m4v', '.mpeg', '.mpg', '.3gp', '.rm', '.rmvb', '.ts', '.asf', '.vob', '.ogv']
    text_extensions = ['.txt', '.doc', '.docx', '.rtf', '.pdf', '.html', '.htm', '.xml', '.json', '.csv', '.tsv', '.log', '.cfg', '.conf', '.ini', '.yaml', '.yml', '.md', '.markdown', '.tex']
    archive_extensions = ['.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar', '.tgz', '.tbz2', '.txz', '.t7z', '.zipx', '.iso', '.z', '.arj']
    
    for url in urls:
        file_extension = os.path.splitext(url)[1].lower()
        
        if file_extension in image_extensions:
            folder = os.path.join('images', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in video_extensions:
            folder = os.path.join('videos', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in text_extensions:
            folder = os.path.join('texts', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
        elif file_extension in archive_extensions:
            folder = os.path.join('archives', file_extension[1:])
            create_folder(folder)
            download_file(url, folder)
            
            # Extract the contents of the archive file
            file_name = os.path.basename(urlparse(url).path)
            file_path = os.path.join(folder, file_name)
            extract_folder = os.path.join(folder, os.path.splitext(file_name)[0])
            create_folder(extract_folder)
            extract_archive(file_path, extract_folder)
        else:
            print(f"Unsupported file extension: {file_extension}")

# Example usage
file_urls = [
    'https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/01.png',
    'https://cdn-uploads.huggingface.co/production/uploads/1670928184033-62441d1d9fdefb55a0b7d12c.gif',
    'C:/Users/heman/Desktop/Deep learning/downloads/test-other.tar.gz',
    'C:/Users/heman/Desktop/Deep learning/datasets/dvc.yaml',
    # 'https://example.com/otherfile.pdf'
]

download_files(file_urls)

In [None]:
C:/Users/heman/.cache/selenium/chromedriver/win32/113.0.5672.63/chromedriver.exe

In [None]:

import os
import json
from typing import List, Dict

def find_driver_paths(search_paths: List[str], driver_names: List[str]) -> Dict[str, List[str]]:
    """
    Finds the paths of browser drivers on the user's operating system.

    Args:
        search_paths: A list of directories to search for browser drivers.
        driver_names: A list of browser driver names to search for.

    Returns:
        A dictionary with driver names as keys and lists of driver paths as values.
    """
    driver_paths = {}

    for path in search_paths:
        for root, dirs, files in os.walk(path):
            for file in files:
                for driver in driver_names:
                    if driver in file.lower():
                        driver_path = os.path.join(root, file)
                        driver_path = os.path.normpath(driver_path)
                        driver_path = driver_path.replace("\\", "/")
                        if driver not in driver_paths:
                            driver_paths[driver] = []
                        driver_paths[driver].append(driver_path)

    return driver_paths

def save_to_json(data: Dict[str, List[str]], file_path: str) -> None:
    """
    Saves the driver information to a JSON file.

    Args:
        data: A dictionary containing the driver information.
        file_path: The path to the JSON file.
    """
    with open(file_path, "w") as file:
        json.dump(data, file, indent=2)

def main() -> None:
    """
    Main function that finds the browser driver paths and saves the information to a JSON file.
    """
    search_paths = [
        "/",  # Linux and macOS root directory
        "/usr/local/bin",  # Common location for user-installed binaries on Linux and macOS
        "/usr/bin",  # Common location for system-wide binaries on Linux
        "/bin",  # Common location for essential binaries on Linux
        "/opt",  # Common location for optional software packages on Linux
        "/usr/local/share",  # Common location for shared data on Linux and macOS
        "/usr/share",  # Common location for system-wide shared data on Linux
        "/var/lib",  # Common location for variable data on Linux
        "/home",  # Common location for user home directories on Linux
        "/Users",  # Common location for user home directories on macOS
        "C:\\",  # Windows root directory
        "C:\\Program Files",  # Common location for installed programs on Windows
        "C:\\Program Files (x86)",  # Common location for 32-bit programs on 64-bit Windows
        "C:\\Windows",  # Windows system directory
        "C:\\Windows\\System32",  # Common location for system binaries on Windows
        "C:\\Users",  # Common location for user directories on Windows
        "C:\\ProgramData",  # Common location for application data on Windows
        "C:\\AppData",  # Common location for application data on Windows
        "C:\\Drivers",  # Common location for driver files on Windows
        "C:\\Tools",  # Common location for tools and utilities on Windows
        "C:\\Selenium",  # Common location for Selenium-related files on Windows
        "C:\\WebDrivers",  # Common location for web driver files on Windows
    ]

    driver_names = [
        "chromedriver",
        "geckodriver",
        "edgedriver",
        "iedriver",
        "operadriver",
        "phantomjs",
        "safaridriver",
        "webdriver",
        "selenium",
        "driver",
        "chrome",
        "firefox",
        "edge",
        "ie",
        "opera",
        "safari",
        "gecko",
        "phantom",
    ]

    driver_paths = find_driver_paths(search_paths, driver_names)
    output_file = "driver_info.json"
    save_to_json(driver_paths, output_file)

    print(f"Driver information saved to {output_file}:")
    print(json.dumps(driver_paths, indent=2))

if __name__ == "__main__":
    main()

In [None]:
import os
import json
import requests
from bs4 import BeautifulSoup
from typing import List, Dict, Optional
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.edge.service import Service as EdgeService
from selenium.webdriver.ie.service import Service as IEService
# from selenium.webdriver.opera.webdriver import Service as OperaService
from selenium.webdriver.safari.webdriver import Service as SafariService

def find_driver_paths(search_paths: List[str], driver_names: List[str]) -> Dict[str, List[str]]:
    """
    Finds the paths of browser drivers on the user's operating system.

    Args:
        search_paths: A list of directories to search for browser drivers.
        driver_names: A list of browser driver names to search for.

    Returns:
        A dictionary with driver names as keys and lists of driver paths as values.
    """
    driver_paths: Dict[str, List[str]] = {}

    for path in search_paths:
        for root, dirs, files in os.walk(path):
            for file in files:
                for driver in driver_names:
                    if driver in file.lower():
                        driver_path = os.path.join(root, file)
                        if driver not in driver_paths:
                            driver_paths[driver] = []
                        driver_paths[driver].append(driver_path)

    return driver_paths

def download_driver(driver_name: str, download_url: str, output_dir: str) -> Optional[str]:
    """
    Downloads a browser driver from the specified URL.

    Args:
        driver_name: The name of the browser driver.
        download_url: The URL to download the driver from.
        output_dir: The directory to save the downloaded driver.

    Returns:
        The path to the downloaded driver file, or None if the download fails.
    """
    try:
        response = requests.get(download_url)
        response.raise_for_status()

        driver_path = os.path.join(output_dir, f"{driver_name}.zip")
        with open(driver_path, "wb") as file:
            file.write(response.content)

        return driver_path
    except requests.exceptions.RequestException:
        return None

def save_to_json(data: Dict[str, List[str]], file_path: str) -> None:
    """
    Saves the driver information to a JSON file.

    Args:
        data: A dictionary containing the driver information.
        file_path: The path to the JSON file.
    """
    with open(file_path, "w") as file:
        json.dump(data, file, indent=2)

def get_user_input(prompt: str, default: Optional[str] = None) -> str:
    """
    Prompts the user for input and returns the entered value.

    Args:
        prompt: The prompt message to display to the user.
        default: The default value to use if the user doesn't provide input.

    Returns:
        The user's input or the default value if no input is provided.
    """
    user_input = input(prompt)
    return user_input.strip() or default

def perform_web_scraping(query: str, location: str, limit: int) -> List[Dict[str, str]]:
    """
    Performs web scraping based on the user's query.

    Args:
        query: The search query.
        location: The search location.
        limit: The maximum number of results to retrieve.

    Returns:
        A list of dictionaries containing the scraped data.
    """
    search_url = f"https://www.google.com/search?q={query}&location={location}&num={limit}"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.text, "html.parser")

    search_results = []
    for result in soup.select(".g"):
        title = result.select_one(".r a").text
        url = result.select_one(".r a")["href"]
        description = result.select_one(".st").text

        search_results.append({
            "title": title,
            "url": url,
            "description": description
        })

    return search_results

def main() -> None:
    """
    Main function that finds the browser driver paths, performs web scraping, and saves the information to a JSON file.
    """
    search_paths = [
        "/",  # Linux and macOS root directory
        "/usr/local/bin",  # Common location for user-installed binaries on Linux and macOS
        "/usr/bin",  # Common location for system-wide binaries on Linux
        "/bin",  # Common location for essential binaries on Linux
        "/opt",  # Common location for optional software packages on Linux
        "/usr/local/share",  # Common location for shared data on Linux and macOS
        "/usr/share",  # Common location for system-wide shared data on Linux
        "/var/lib",  # Common location for variable data on Linux
        "/home",  # Common location for user home directories on Linux
        "/Users",  # Common location for user home directories on macOS
        "C:\\",  # Windows root directory
        "C:\\Program Files",  # Common location for installed programs on Windows
        "C:\\Program Files (x86)",  # Common location for 32-bit programs on 64-bit Windows
        "C:\\Windows",  # Windows system directory
        "C:\\Windows\\System32",  # Common location for system binaries on Windows
        "C:\\Users",  # Common location for user directories on Windows
        "C:\\ProgramData",  # Common location for application data on Windows
        "C:\\AppData",  # Common location for application data on Windows
        "C:\\Drivers",  # Common location for driver files on Windows
        "C:\\Tools",  # Common location for tools and utilities on Windows
        "C:\\Selenium",  # Common location for Selenium-related files on Windows
        "C:\\WebDrivers",  # Common location for web driver files on Windows
    ]

    driver_names = [
        "chromedriver",
        "geckodriver",
        "edgedriver",
        "iedriver",
        "operadriver",
        "phantomjs",
        "safaridriver",
        "webdriver",
        "selenium",
        "driver",
        "chrome",
        "firefox",
        "edge",
        "ie",
        "opera",
        "safari",
        "gecko",
        "phantom",
    ]

    driver_paths = find_driver_paths(search_paths, driver_names)

    # Download missing drivers
    for driver_name in driver_names:
        if driver_name not in driver_paths:
            download_url = f"https://example.com/drivers/{driver_name}.zip"
            downloaded_path = download_driver(driver_name, download_url, "drivers")
            if downloaded_path:
                driver_paths[driver_name] = [downloaded_path]

    # Get user input for query, location, and limit
    query = get_user_input("Enter your query: ")
    location = get_user_input("Enter your location: ")
    limit = int(get_user_input("Enter the limit (default: 5): ", default="5"))

    # Perform web scraping based on the user's query
    scraped_data = perform_web_scraping(query, location, limit)

    # Filter out garbage from driver paths
    filtered_driver_paths: Dict[str, List[str]] = {}
    for driver, paths in driver_paths.items():
        filtered_paths = [path for path in paths if not any(garbage in path for garbage in ["tmp", "temp", "cache", "old", "backup"])]
        if filtered_paths:
            filtered_driver_paths[driver] = filtered_paths

    # Prepare the output data
    output_data = {
        "query": query,
        "location": location,
        "limit": limit,
        "driver_paths": filtered_driver_paths,
        "scraped_data": scraped_data
    }

    # Save the output data to a JSON file
    output_file = "search_results.json"
    save_to_json(output_data, output_file)

    print(f"Search results saved to {output_file}:")
    print(json.dumps(output_data, indent=2))

    # Perform web scraping using different browsers
    for driver_name, driver_paths in filtered_driver_paths.items():
        if driver_paths:
            driver_path = driver_paths[0]  # Use the first available driver path

            if "chrome" in driver_name.lower():
                service = ChromeService(executable_path=driver_path)
                driver = webdriver.Chrome(service=service)
            elif "firefox" in driver_name.lower():
                service = FirefoxService(executable_path=driver_path)
                driver = webdriver.Firefox(service=service)
            elif "edge" in driver_name.lower():
                service = EdgeService(executable_path=driver_path)
                driver = webdriver.Edge(service=service)
            elif "ie" in driver_name.lower():
                service = IEService(executable_path=driver_path)
                driver = webdriver.Ie(service=service)
            # elif "opera" in driver_name.lower():
            #     service = OperaService(executable_path=driver_path)
            #     driver = webdriver.Opera(service=service)
            elif "safari" in driver_name.lower():
                service = SafariService(executable_path=driver_path)
                driver = webdriver.Safari(service=service)
            else:
                continue

            try:
                driver.get(f"https://www.google.com/search?q={query}&location={location}&num={limit}")
                browser_scraped_data = []

                search_results = driver.find_elements_by_css_selector(".g")
                for result in search_results:
                    title = result.find_element_by_css_selector(".r a").text
                    url = result.find_element_by_css_selector(".r a").get_attribute("href")
                    description = result.find_element_by_css_selector(".st").text

                    browser_scraped_data.append({
                        "title": title,
                        "url": url,
                        "description": description
                    })

                browser_output_file = f"{driver_name}_results.json"
                save_to_json(browser_scraped_data, browser_output_file)
                print(f"Search results from {driver_name} saved to {browser_output_file}")

            except Exception as e:
                print(f"Error occurred while scraping with {driver_name}: {str(e)}")

            finally:
                driver.quit()

if __name__ == "__main__":
    main()

In [None]:
import json
import os
from typing import Dict, List
import platform

# Define browser executable names for different platforms
BROWSER_EXECUTABLES = {
    'Windows': ['chrome.exe', 'firefox.exe', 'iexplore.exe', 'MicrosoftEdge.exe', 'opera.exe'],
    'Linux': ['chrome', 'firefox', 'opera'],
    'Darwin': ['Google Chrome', 'Firefox', 'Safari', 'Opera']
}


def find_browsers(browser_executables: List[str], search_paths: List[str]) -> Dict[str, str]:
    browsers_found = {}
    for exec_name in browser_executables:
        for search_path in search_paths:
            for root, dirs, files in os.walk(search_path):
                if exec_name in files:
                    browsers_found[exec_name] = os.path.join(root, exec_name)
    return browsers_found


def get_os_type() -> str:
    os_type = platform.system()
    if os_type not in BROWSER_EXECUTABLES:
        raise ValueError(f"Unsupported operating system: {os_type}")
    return os_type


def main():
    os_type = get_os_type()
    browser_exec_names = BROWSER_EXECUTABLES[os_type]

    # Define the search paths based on OS
    search_paths = {
        'Windows': ['C:\\Program Files', 'C:\\Program Files (x86)', 'C:\\Users'],
        'Linux': ['/usr/bin', '/opt'],
        'Darwin': ['/Applications', '/Users']
    }[os_type]

    browsers = find_browsers(browser_exec_names, search_paths)

    # Save to JSON
    with open('browsers_paths.json', 'w') as json_file:
        json.dump(browsers, json_file, indent=4)

    print(f"Browsers found: {len(browsers)}")
    for name, path in browsers.items():
        print(f"{name}: {path}")


if __name__ == "__main__":
    main()

In [None]:
from selenium import webdriver

browser = webdriver.Firefox(keep_alive=True)
browser.get('http://selenium.dev/')

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

browser = webdriver.Firefox()

browser.get('http://www.yahoo.com')
assert 'Yahoo' in browser.title

elem = browser.find_element(By.NAME, 'p')  # Find the search box
elem.send_keys('seleniumhq' + Keys.RETURN)

browser.quit()




In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# Initialize the Firefox webdriver
browser = webdriver.Firefox()

# Open Yahoo search engine
browser.get('http://www.yahoo.com')
assert 'Yahoo' in browser.title

# Find the search box and enter the query
search_box = browser.find_element(By.NAME, 'p')
search_box.send_keys('seleniumhq' + Keys.RETURN)

# Wait for the search results to load
browser.implicitly_wait(10)  # Adjust the wait time as needed

# Find all search result links
search_results = browser.find_elements(By.XPATH, "//a[@href]")

# Extract and print the URLs
for result in search_results:
    url = result.get_attribute("href")
    if url:
        print(url)

# Quit the browser
browser.quit()


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

def tell_jokes(query):
    # Initialize the Firefox webdriver
    browser = webdriver.Firefox()

    # Open a search engine (Yahoo in this case)
    browser.get('http://www.yahoo.com')
    assert 'Yahoo' in browser.title

    # Find the search box and enter the user's query
    search_box = browser.find_element(By.NAME, 'p')
    search_box.send_keys(query + ' jokes' + Keys.RETURN)

    # Wait for the search results to load
    browser.implicitly_wait(10)  # Adjust the wait time as needed

    # Find all search result links
    search_results = browser.find_elements(By.XPATH, "//a[@href]")

    # Extract and print the URLs
    for result in search_results:
        url = result.get_attribute("href")
        if url:
            print(url)

    # Quit the browser
    browser.quit()

# Ask the user for a joke category or keyword
user_input = 'women'

# Tell jokes based on the user's input
tell_jokes(user_input)


In [None]:
import os

def count_driver_paths(browser_name):
    # Define the driver executable names for different browsers
    driver_executables = {
        'chrome': 'chromedriver',
        'firefox': 'geckodriver',
        'edge': 'msedgedriver',
        # Add more browser-driver mappings as needed
    }

    # Get the PATH environment variable and split it into individual directories
    path_list = os.environ['PATH'].split(os.pathsep)

    # Initialize a counter for the number of driver paths found
    count = 0

    # Iterate through each directory in the PATH
    for directory in path_list:
        # Check if the driver executable exists for the specified browser
        driver_executable = driver_executables.get(browser_name)
        if driver_executable and os.path.exists(os.path.join(directory, driver_executable)):
            count += 1

    return count

# Example usage:
browser_name = 'chrome'  # Change this to 'firefox', 'edge', etc., as needed
driver_count = count_driver_paths(browser_name)
print(f"Number of {browser_name} driver paths found: {driver_count}")


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

browser = webdriver.Firefox()

browser.get('http://www.yahoo.com')
assert 'Yahoo' in browser.title

elem = browser.find_element(By.NAME, 'p')  # Find the search box
elem.send_keys('seleniumhq' + Keys.RETURN)

browser.quit()

In [None]:
import json
import time
from typing import List, Dict

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait


def search_locations(query: str, driver_path: str) -> List[Dict[str, str]]:
    """
    Searches for locations using the provided query and returns the response as a list of dictionaries.

    :param query: The search query for locations.
    :param driver_path: The path to the Firefox WebDriver executable.
    :return: A list of dictionaries containing the search results.
    """
    # Set up Firefox WebDriver options
    options = Options()
    options.headless = True  # Run Firefox in headless mode

    # Set up Firefox WebDriver service
    service = Service(executable_path=driver_path)

    # Create a new instance of the Firefox driver
    browser = webdriver.Firefox(service=service, options=options)

    try:
        # Navigate to Yahoo
        browser.get('https://www.yahoo.com')
        assert 'Yahoo' in browser.title

        # Find the search box and enter the query
        search_box = WebDriverWait(browser, 10).until(
            EC.presence_of_element_located((By.NAME, 'p'))
        )
        search_box.send_keys(query + Keys.RETURN)

        # Wait for the search results to load
        time.sleep(5)

        # Extract the search results
        search_results = browser.find_elements(By.CSS_SELECTOR, 'div.result')

        # Process the search results
        results = []
        for result in search_results:
            title = result.find_element(By.CSS_SELECTOR, 'h3').text
            url = result.find_element(By.CSS_SELECTOR, 'a').get_attribute('href')
            results.append({'title': title, 'url': url})

        return results

    finally:
        # Quit the browser
        browser.quit()


def save_to_json(data: List[Dict[str, str]], filename: str) -> None:
    """
    Saves the provided data to a JSON file.

    :param data: The data to be saved as JSON.
    :param filename: The name of the JSON file.
    """
    with open(filename, 'w') as file:
        json.dump(data, file, indent=4)


# Example usage
if __name__ == '__main__':
    query = 'restaurants in New York'
    driver_path = '/path/to/geckodriver'  # Replace with the actual path to geckodriver

    search_results = search_locations(query, driver_path)
    save_to_json(search_results, 'search_results.json')

    # Print the search results
    for result in search_results:
        print(f"Title: {result['title']}")
        print(f"URL: {result['url']}")
        print('---')

In [None]:
!pip install -q webdriver_manager

In [None]:
# selenium 3
from selenium import webdriver
from webdriver_manager.firefox import GeckoDriverManager

driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())

In [None]:
import json
import time
import os
import sys
from typing import List, Dict
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

def search_locations(query: str, driver_path: str) -> List[Dict[str, str]]:
    """
    Searches for locations using the provided query and returns the response as a list of dictionaries.

    :param query: The search query for locations.
    :param driver_path: The path to the Firefox WebDriver executable.
    :return: A list of dictionaries containing the search results.
    """
    # Set up Firefox WebDriver options
    options = Options()
    options.headless = True  # Run Firefox in headless mode

    # Set up Firefox WebDriver service
    service = Service(executable_path=driver_path)

    # Create a new instance of the Firefox driver
    browser = webdriver.Firefox(service=service, options=options)

    try:
        # Navigate to Yahoo
        browser.get('https://www.yahoo.com')
        assert 'Yahoo' in browser.title

        # Find the search box and enter the query
        search_box = WebDriverWait(browser, 10).until(
            EC.presence_of_element_located((By.NAME, 'p'))
        )
        search_box.send_keys(query + Keys.RETURN)

        # Wait for the search results to load
        WebDriverWait(browser, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div#web'))
        )

        # Extract the search results
        search_results = browser.find_elements(By.CSS_SELECTOR, 'div#web > ol > li')

        # Process the search results
        results = []
        for result in search_results:
            title = result.find_element(By.CSS_SELECTOR, 'h3').text
            url = result.find_element(By.CSS_SELECTOR, 'a').get_attribute('href')
            results.append({'title': title, 'url': url})

        return results

    except Exception as e:
        print(f"An error occurred: {e}")
        return []
    finally:
        # Quit the browser
        browser.quit()


def save_to_json(data: List[Dict[str, str]], filename: str) -> None:
    """
    Saves the provided data to a JSON file.

    :param data: The data to be saved as JSON.
    :param filename: The name of the JSON file.
    """
    with open(filename, 'w') as file:
        json.dump(data, file, indent=4)


# Example usage
if __name__ == '__main__':
    query = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else 'restaurants in New York'
    driver_path = os.getenv('GECKO_DRIVER_PATH', '/path/to/geckodriver')  # Get the path from an environment variable

    search_results = search_locations(query, driver_path)
    save_to_json(search_results, 'search_results.json')

    # Print the search results
    for result in search_results:
        print(f"Title: {result['title']}")
        print(f"URL: {result['url']}")
        print('---')

In [None]:
import unittest
from selenium import webdriver

class GoogleTestCase(unittest.TestCase):

    def setUp(self):
        self.browser = webdriver.Firefox()
        self.addCleanup(self.browser.quit)

    def test_page_title(self):
        self.browser.get('http://www.google.com')
        self.assertIn('Google', self.browser.title)

if __name__ == '__main__':
    unittest.main(verbosity=2)

In [None]:
import json
import os
import time
from typing import List
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.common.exceptions import WebDriverException


def search_locations(query: str, driver_path: str, firefox_binary_path: str) -> List[str]:
    """
    Performs a search for locations based on the query provided using Selenium.

    :param query: The search query for locations.
    :param driver_path: The file path to the geckodriver executable.
    :param firefox_binary_path: The file path to the Firefox binary.
    :return: A list of location details.
    """
    # Set up Firefox options
    firefox_options = Options()
    firefox_options.add_argument("--headless")  # Run in headless mode
    firefox_options.binary = FirefoxBinary(firefox_binary_path)

    # Set up Firefox service
    firefox_service = Service(executable_path=driver_path)

    # Create a new Firefox driver instance
    driver = webdriver.Firefox(service=firefox_service, options=firefox_options)

    try:
        # Navigate to the Google search page
        driver.get("https://www.google.com/search")

        # Find the search input field and enter the query
        search_input = driver.find_element(By.NAME, "q")
        search_input.send_keys(query)
        search_input.submit()

        # Wait for the search results to load
        time.sleep(5)

        # Find all the location elements
        location_elements = driver.find_elements(By.CLASS_NAME, "rllt__details")

        # Extract the location details
        locations = [element.text for element in location_elements]

        return locations
    except WebDriverException as e:
        print(f"An error occurred while searching for locations: {e}")
        return []
    finally:
        # Close the browser
        driver.quit()


def main():
    # Define the search query and paths
    query ='jokes'
    download_path = "C:/Users/heman/Desktop/Deep learning/file_operations-/" 
    firefox_binary_path ='C:/Users/heman/Desktop/Deep learning/file_operations-/geckodriver.exe'

    # Check if geckodriver is already downloaded
    driver_path = os.path.join(download_path, "geckodriver.exe")
    if not os.path.exists(driver_path):
        print("geckodriver is not found. Please download it manually and place it in the specified download path.")
        exit(1)

    # Perform the search and retrieve the locations
    results = search_locations(query, driver_path, firefox_binary_path)

    # Save the results to a JSON file
    with open("location_results.json", "w") as file:
        json.dump(results, file, indent=2)

    # Print the location details
    print("Location Details:")
    for location in results:
        print(location)


if __name__ == "__main__":
    main()

In [None]:
import json
import os
import time
from typing import List
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.common.exceptions import WebDriverException
# selenium 3
from selenium import webdriver
from webdriver_manager.firefox import GeckoDriverManager

driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())

def search_locations(query: str, driver_path: str) -> List[str]:
    """
    Performs a search for locations based on the query provided using Selenium.

    :param query: The search query for locations.
    :param driver_path: The file path to the geckodriver executable.
    :return: A list of location details.
    """
    # Set up Firefox options
    firefox_options = Options()
    firefox_options.add_argument("--headless")  # Run in headless mode

    # # Set up Firefox service
    # firefox_service = Service(executable_path=driver_path)

    # Create a new Firefox driver instance
    driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())

    try:
        # Navigate to the Google search page
        driver.get("https://www.google.com/search")

        # Find the search input field and enter the query
        search_input = driver.find_element(By.NAME, "q")
        search_input.send_keys(query)
        search_input.submit()

        # Wait for the search results to load
        time.sleep(5)

        # Find all the location elements
        location_elements = driver.find_elements(By.CLASS_NAME, "rllt__details")

        # Extract the location details
        locations = [element.text for element in location_elements]

        return locations
    except WebDriverException as e:
        print(f"An error occurred while searching for locations: {e}")
        return []
    finally:
        # Close the browser
        driver.quit()


def main():
    # Define the search query and paths
    query = 'jokes'
    # download_path = "C:/Users/heman/Desktop/Deep learning/file_operations-/"

    # # Check if geckodriver is already downloaded
    # driver_path = os.path.join(download_path, "geckodriver.exe")
    # if not os.path.exists(driver_path):
    #     print("geckodriver is not found. Please download it manually and place it in the specified download path.")
    #     exit(1)

    # Perform the search and retrieve the locations
    results = search_locations(query, driver_path)

    # Save the results to a JSON file
    with open("location_results.json", "w") as file:
        json.dump(results, file, indent=2)

    # Print the location details
    print("Location Details:")
    for location in results:
        print(location)


if __name__ == "__main__":
    main()

In [None]:

import json
import os
import stat
import time
from typing import List
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
import requests
import zipfile
from io import BytesIO



def search_locations(query: str, driver_path: str) -> List[str]:
    """
    Perform an advanced search for locations using Selenium.

    Args:
        query (str): The search query for locations.
        driver_path (str): The path to the Firefox driver executable.

    Returns:
        List[str]: A list of location details.
    """
    # Set up Firefox options
    firefox_options = Options()
    firefox_options.add_argument("--headless")  # Run in headless mode

    # Set up Firefox profile
    firefox_profile = FirefoxProfile()

    # Set up Firefox service
    firefox_service = Service(executable_path=driver_path)

    # Create a new Firefox driver instance
    driver = webdriver.Firefox(service=firefox_service, options=firefox_options, firefox_profile=firefox_profile)

    try:
        # Navigate to the search page
        driver.get("https://www.google.com/search")

        # Find the search input field and enter the query
        search_input = driver.find_element(By.NAME, "q")
        search_input.send_keys(query)
        search_input.submit()

        # Wait for the search results to load
        time.sleep(30)

        # Find all the location elements
        location_elements = driver.find_elements(By.CLASS_NAME, "rllt__details")

        # Extract the location details
        locations = []
        for element in location_elements:
            location_details = element.text
            locations.append(location_details)

        return locations

    finally:
        # Close the browser
        driver.quit()

# Example usage
query = "Restaurants in New York"
download_path = "C:/Users/heman/Desktop/Deep learning/file_operations-/"  # Replace with the desired download path

# Download geckodriver if it doesn't exist
driver_path = os.path.join(download_path, "geckodriver.exe")
if not os.path.exists(driver_path):
    driver_path = download_geckodriver(download_path)
    if driver_path is None:
        print("Failed to download geckodriver. Exiting.")
        exit(1)
else:
    print("geckodriver is already downloaded.")

results = search_locations(query, driver_path)

# Save the results to a JSON file
with open("location_results.json", "w") as file:
    json.dump(results, file, indent=2)

# Print the location details
print("Location Details:")
for location in results:
    print(location)

In [None]:
import json
import os
import stat
import time
from typing import List
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
import requests
import zipfile
from io import BytesIO

def download_geckodriver(download_path: str):
    # ... (download_geckodriver function code remains the same)

def search_locations(query: str, driver_path: str, firefox_binary_path: str) -> List[str]:
    """
    Perform an advanced search for locations using Selenium.

    Args:
        query (str): The search query for locations.
        driver_path (str): The path to the Firefox driver executable.
        firefox_binary_path (str): The path to the Firefox binary executable.

    Returns:
        List[str]: A list of location details.
    """
    # Set up Firefox options
    firefox_options = Options()
    firefox_options.binary_location = firefox_binary_path
    firefox_options.add_argument("--headless")  # Run in headless mode

    # Set up Firefox service
    firefox_service = Service(executable_path=driver_path)

    # Create a new Firefox driver instance
    driver = webdriver.Firefox(service=firefox_service, options=firefox_options)

    try:
        # Navigate to the search page
        driver.get("https://www.google.com/search")

        # Find the search input field and enter the query
        search_input = driver.find_element(By.NAME, "q")
        search_input.send_keys(query)
        search_input.submit()

        # Wait for the search results to load
        time.sleep(5)

        # Find all the location elements
        location_elements = driver.find_elements(By.CLASS_NAME, "rllt__details")

        # Extract the location details
        locations = []
        for element in location_elements:
            location_details = element.text
            locations.append(location_details)

        return locations

    finally:
        # Close the browser
        driver.quit()

# Example usage
query = "Restaurants in New York"
download_path = "C:/Users/heman/Desktop/Deep learning/file_operations-/"  # Replace with the desired download path
firefox_binary_path = "C:/Program Files/Mozilla Firefox/firefox.exe"  # Replace with the path to your Firefox binary

# Download geckodriver if it doesn't exist
driver_path = os.path.join(download_path, "geckodriver.exe")
if not os.path.exists(driver_path):
    driver_path = download_geckodriver(download_path)
    if driver_path is None:
        print("Failed to download geckodriver. Exiting.")
        exit(1)
else:
    print("geckodriver is already downloaded.")

results = search_locations(query, driver_path, firefox_binary_path)

# Save the results to a JSON file
with open("location_results.json", "w") as file:
    json.dump(results, file, indent=2)

# Print the location details
print("Location Details:")
for location in results:
    print(location)