<a href="https://colab.research.google.com/github/Vathsav56/AI-AGENT-Project/blob/main/code/Web_finder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install googlesearch-python requests beautifulsoup4

Collecting googlesearch-python
  Downloading googlesearch_python-1.3.0-py3-none-any.whl.metadata (3.4 kB)
Downloading googlesearch_python-1.3.0-py3-none-any.whl (5.6 kB)
Installing collected packages: googlesearch-python
Successfully installed googlesearch-python-1.3.0


In [5]:
import os
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import webbrowser
def get_topic_info(topic):

    important_points = []
    print(f"Searching the web for: {topic}...")

    try:

        for url in search(topic, num_results=5, lang='en'):
            print(f"Checking general info URL: {url}")
            try:
                response = requests.get(url, timeout=5)
                response.raise_for_status()
                soup = BeautifulSoup(response.text, 'html.parser')

                paragraphs = soup.find_all('p')
                for p in paragraphs[:3]:
                    text = p.get_text(strip=True)
                    if len(text) > 50:
                        important_points.append(text)

            except requests.exceptions.RequestException as e:
                print(f"Could not access {url}: {e}")
            except Exception as e:
                print(f"Error parsing {url}: {e}")

            if len(important_points) > 10:
                break

    except Exception as e:
        print(f"An error occurred during web search for general info: {e}")

    return important_points

def find_and_display_image(topic):

    print(f"\nSearching Google Images for: {topic}...")
    image_search_query = f"{topic} image"
    direct_image_url = None

    try:
        for url in search(image_search_query, num_results=5, lang='en', safe='on'):
            if "images.google.com" in url or "google.com/imgres" in url:
                print(f"Checking image search result URL: {url}")
                try:
                    response = requests.get(url, timeout=5)
                    response.raise_for_status()
                    soup = BeautifulSoup(response.text, 'html.parser')


                    for meta in soup.find_all('meta', property="og:image"):
                        direct_image_url = meta.get('content')
                        if direct_image_url and (direct_image_url.startswith('http') or direct_image_url.startswith('https')):
                            print(f"Found potential direct image URL from og:image: {direct_image_url}")
                            break

                    if not direct_image_url:

                        for img_tag in soup.find_all('img', src=True):
                            src = img_tag.get('src')
                            if src and (src.startswith('http') or src.startswith('https')) and ('q=tbn' not in src): # Filter out thumbnails
                                if any(ext in src for ext in ['.jpg', '.jpeg', '.png', '.gif']):
                                    direct_image_url = src
                                    print(f"Found potential direct image URL from img src: {direct_image_url}")
                                    break
                    if direct_image_url:
                        break

                except requests.exceptions.RequestException as e:
                    print(f"Could not access {url} for image: {e}")
                except Exception as e:
                    print(f"Error parsing {url} for image: {e}")
            if direct_image_url:
                break


    except Exception as e:
        print(f"An error occurred during Google Images search: {e}")

    if direct_image_url:
        print(f"\nOpening image in your default web browser: {direct_image_url}")
        try:
            webbrowser.open_new_tab(direct_image_url)
            return direct_image_url
        except Exception as e:
            print(f"Could not open browser: {e}")
            return None
    else:
        print("\nCould not find a direct image URL to display.")
        return None

def create_info_file(topic, points, image_url=None):
    """
    Creates a text file with the extracted important points and an image URL (if found).
    """
    filename = f"{topic.replace(' ', '_').lower()}_info.txt"
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(f"--- Information about: {topic.upper()} ---\n\n")
            f.write("Important Points:\n")
            if points:
                for i, point in enumerate(points):
                    f.write(f"{i+1}. {point}\n\n")
            else:
                f.write("No significant points found.\n\n")

            if image_url:
                f.write("\n--- Image Displayed (Opened in Browser) ---\n")
                f.write(f"The image was opened in your default web browser. You can also view it at:\n{image_url}\n")
            else:
                f.write("No direct image was found or opened in browser.\n")

        print(f"\nInformation saved to: {filename}")
    except IOError as e:
        print(f"Error writing to file {filename}: {e}")

if __name__ == "__main__":
    topic_input = input("Enter the topic you want to search for: ")


    points = get_topic_info(topic_input)


    found_image_url = find_and_display_image(topic_input)


    create_info_file(topic_input, points, found_image_url)

    print("\n--- Process Complete ---")
    print("Please review the generated text file for information.")
    if found_image_url:
        print("An image related to your topic should have opened in your browser.")

Enter the topic you want to search for: nvidia stock
Searching the web for: nvidia stock...
Checking general info URL: https://finance.yahoo.com/news/nvidia-earnings-trump-tariff-updates-and-the-feds-preferred-inflation-gauge-what-to-know-this-week-113221140.html
Could not access https://finance.yahoo.com/news/nvidia-earnings-trump-tariff-updates-and-the-feds-preferred-inflation-gauge-what-to-know-this-week-113221140.html: 429 Client Error: Too Many Requests for url: https://finance.yahoo.com/news/nvidia-earnings-trump-tariff-updates-and-the-feds-preferred-inflation-gauge-what-to-know-this-week-113221140.html
Checking general info URL: https://www.nvidia.com/en-us/about-nvidia/legal-info/logo-brand-usage/
Checking general info URL: https://investor.nvidia.com/stock-info/stock-quote-and-chart/default.aspx
Could not access https://investor.nvidia.com/stock-info/stock-quote-and-chart/default.aspx: 403 Client Error: Forbidden for url: https://investor.nvidia.com/stock-info/stock-quote-and-