In [7]:
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict
from groq import Groq

def get_company_info(company_name: str, api_key: str) -> Optional[Dict[str, str]]:
    try:
        # Web scraping part remains the same
        search_url = f"https://www.google.com/search?q={company_name.replace(' ', '+')}+official+website?"
        headers = {"User-Agent": "Mozilla/5.0"}
        search_response = requests.get(search_url, headers=headers)
        search_soup = BeautifulSoup(search_response.text, "html.parser")
        
        first_link = search_soup.find("a")
        if not first_link:
            return None
        
        company_url = first_link.get("href")
        response = requests.get(company_url, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        name = soup.find("title").text.strip() if soup.find("title") else "Unknown"
        description_meta = soup.find("meta", attrs={"name": "description"})
        description = description_meta["content"].strip() if description_meta else "No description available"
        
        # Initialize Groq client
        client = Groq(api_key=api_key)
        
        # Create the prompt for analysis
        prompt = f"Extract the company name, description, and location from the following text. Format the response as a clear, concise summary: {description}"
        
        # Make the API call to Groq
        completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that extracts and summarizes company information."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            model="llama2-70b-4096",  # You can also use "llama2-70b-4096"
            temperature=0.3,
            max_tokens=500
        )
        
        # Extract the analysis from Groq's response
        ai_analysis = completion.choices[0].message.content
        
        return {
            "name": name,
            "description": ai_analysis,
            "location": company_url
        }
    except Exception as e:
        print("Error fetching company data:", e)
        return None

if __name__ == "__main__":
    company_name = "Avadh Group Surat"
    api_key = "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"  # Replace with your actual Groq API key
    company_info = get_company_info(company_name, api_key)
    print(company_info)

Error fetching company data: Invalid URL '/httpservice/retry/enablejs?sei=78qdZ-fuOu7R1e8Pg5SB6AU': No scheme supplied. Perhaps you meant https:///httpservice/retry/enablejs?sei=78qdZ-fuOu7R1e8Pg5SB6AU?
None


In [8]:
import requests
from bs4 import BeautifulSoup
from typing import Optional, Dict
from groq import Groq
from urllib.parse import urljoin, urlparse

def get_company_info(company_name: str, api_key: str) -> Optional[Dict[str, str]]:
    try:
        # Modified web scraping part with better URL handling
        search_url = f"https://www.google.com/search?q={company_name.replace(' ', '+')}+official+website"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Accept-Encoding": "gzip, deflate, br",
            "DNT": "1",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1"
        }
        
        search_response = requests.get(search_url, headers=headers)
        search_response.raise_for_status()
        search_soup = BeautifulSoup(search_response.text, "html.parser")
        
        # Look specifically for search result links
        search_results = search_soup.select(".yuRUbf a")  # Google's search result class
        if not search_results:
            search_results = search_soup.select("div.g div.r a")  # Alternative class names
        if not search_results:
            search_results = search_soup.find_all("a", href=True)  # Fallback to all links
            
        # Find first valid URL
        company_url = None
        for result in search_results:
            href = result.get("href", "")
            if href.startswith(("http://", "https://")):
                # Validate URL
                parsed = urlparse(href)
                if parsed.scheme and parsed.netloc:
                    company_url = href
                    break
        
        if not company_url:
            raise ValueError("No valid company URL found")
            
        # Fetch company website
        response = requests.get(company_url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Extract title and description with better error handling
        name = soup.find("title")
        name = name.text.strip() if name else company_name
        
        description = ""
        # Try multiple meta tag variations
        for meta in soup.find_all("meta"):
            if meta.get("name", "").lower() == "description" or meta.get("property", "").lower() == "og:description":
                description = meta.get("content", "").strip()
                break
        
        if not description:
            # Fallback to first paragraph or div text
            first_p = soup.find("p")
            if first_p:
                description = first_p.text.strip()
            else:
                first_div = soup.find("div")
                description = first_div.text.strip() if first_div else "No description available"
        
        # Initialize Groq client
        client = Groq(api_key=api_key)
        
        # Create the prompt for analysis
        prompt = f"""Analyze and extract key information about this company:
        Name: {name}
        Website: {company_url}
        Description: {description}
        
        Please provide a concise summary including:
        1. Company name
        2. Main business/industry
        3. Location (if available)
        4. Key products/services"""
        
        # Make the API call to Groq
        completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant that extracts and summarizes company information in a clear, structured format."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            model="mixtral-8x7b-32768",
            temperature=0.3,
            max_tokens=500
        )
        
        # Extract the analysis from Groq's response
        ai_analysis = completion.choices[0].message.content
        
        return {
            "name": name,
            "website": company_url,
            "description": ai_analysis
        }
    except Exception as e:
        print(f"Error fetching company data: {str(e)}")
        return None

if __name__ == "__main__":
    company_name = "Avadh Group Surat"
    api_key = "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"  # Replace with your actual Groq API key
    company_info = get_company_info(company_name, api_key)
    print(company_info)

{'name': 'Google Search Help', 'website': 'https://support.google.com/websearch', 'description': '1. Company Name: Google Search Help\n2. Main Business/Industry: Online Search Engine Services\n3. Location: Information not explicitly provided, but Google is headquartered in Mountain View, California, USA.\n4. Key Products/Services: Offical Google Search Help Center, providing tips, tutorials, and answers to frequently asked questions about using Google Search.'}


In [18]:
%pip install wikipedia
import wikipedia

def get_wikipedia_summary(company_name):
    try:
        summary = wikipedia.summary(company_name, sentences=2)
        return summary
    except wikipedia.exceptions.DisambiguationError as e:
        return f"Multiple results found: {e.options}"
    except wikipedia.exceptions.PageError:
        return "No Wikipedia page found."

print(get_wikipedia_summary("Amazon"))


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
No Wikipedia page found.


In [24]:
import requests 
from bs4 import BeautifulSoup 
from groq import Groq
from urllib.parse import urlencode

def scrape_google(company_name):
    """Scrapes first relevant Google Search result."""
    search_query = {
        'q': f"{company_name} real estate developer Surat",
        'num': '10'  # We only need first few results now
    }
    search_url = f"https://www.google.com/search?{urlencode(search_query)}"
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.google.com/",
    }
    
    try:
        print(f"Searching Google for: {search_url}")
        response = requests.get(search_url, headers=headers, timeout=15)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Get all links
        all_links = soup.find_all("a", href=True)
        print(f"\nFound {len(all_links)} links")
        
        # Return first valid external link
        for link in all_links:
            href = link.get("href", "")
            if href.startswith("http") and "google" not in href:
                print(f"Selected URL: {href}")
                return href
                
        return None
        
    except Exception as e:
        print(f"Error in scrape_google: {str(e)}")
        return None

def scrape_website(url):
    """Scrapes the website for basic details."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
    }
    
    try:
        print(f"\nScraping: {url}")
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Get title
        title_tag = soup.find("title")
        name = title_tag.text.strip() if title_tag else "No Name"
        print(f"Found title: {name}")
        
        # Get description
        description = ""
        
        # Try meta descriptions first
        meta_desc = soup.find("meta", {"name": "description"}) or \
                   soup.find("meta", {"property": "og:description"})
        if meta_desc and meta_desc.get("content"):
            description = meta_desc["content"].strip()
        
        # If no meta description, get first few paragraphs
        if not description:
            paragraphs = soup.find_all("p")
            description = " ".join(p.text.strip() for p in paragraphs[:3])
        
        if not description:
            description = "No description available"
            
        return {
            "name": name,
            "website": url,
            "description": description[:1000]  # Limit description length
        }
    except Exception as e:
        print(f"Error in scrape_website: {str(e)}")
        return None

def summarize_with_groq(company_info, api_key):
    """Uses Groq API to summarize company data."""
    if not company_info:
        return "No company information available to summarize."
        
    try:
        client = Groq(api_key=api_key)
        
        prompt = f"""Analyze this real estate developer company in Surat:
        Name: {company_info['name']}
        Website: {company_info['website']}
        Description: {company_info['description']}
        
        Please provide a detailed summary including:
        1. Company overview and main business focus
        2. Types of projects and developments
        3. Notable features or specializations
        4. Market presence in Surat
        """
        
        completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a real estate industry analyst focusing on the Surat market. Provide concise but informative analysis."
                },
                {"role": "user", "content": prompt}
            ],
            model="mixtral-8x7b-32768",
            temperature=0.3,
            max_tokens=500
        )
        
        return completion.choices[0].message.content
    except Exception as e:
        print(f"Error in summarize_with_groq: {str(e)}")
        return f"Error generating summary: {str(e)}"

if __name__ == "__main__":
    company_name = "Avadh Projects Surat"
    groq_api_key = "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"
    
    print(f"Starting search for: {company_name}")
    website_url = scrape_google(company_name)
    
    if website_url:
        print(f"\nFound website: {website_url}")
        company_data = scrape_website(website_url)
        if company_data:
            print("\nGenerating summary...")
            summary = summarize_with_groq(company_data, groq_api_key)
            print("\nCompany Analysis:")
            print(summary)
        else:
            print("Failed to scrape website data")
    else:
        print("Could not find any relevant websites.")

Starting search for: Avadh Projects Surat
Searching Google for: https://www.google.com/search?q=Avadh+Projects+Surat+real+estate+developer+Surat&num=10

Found 3 links
Could not find any relevant websites.


In [30]:
import requests
from bs4 import BeautifulSoup
from groq import Groq
from urllib.parse import urlencode, urlparse

def scrape_google(company_name):
    """Scrapes Google Search results for a company website."""
    search_query = {
        'q': f"{company_name} real estate developer Surat",
        'num': '10'
    }
    search_url = f"https://www.google.com/search?{urlencode(search_query)}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.google.com/",
    }

    try:
        print(f"Searching Google for: {search_url}")
        response = requests.get(search_url, headers=headers, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        all_links = soup.find_all("a", href=True)
        print(f"\nFound {len(all_links)} links")

        for link in all_links:
            href = link.get("href", "")

            if href.startswith("http"):
                parsed_url = urlparse(href)
                hostname = parsed_url.netloc

                if not any(domain in hostname for domain in ["google.com", "bing.com", "yahoo.com"]):
                    print(f"Selected URL: {href}")
                    return href

        return None

    except requests.exceptions.RequestException as e:
        print(f"Error in scrape_google: {str(e)}")
        return None
    except Exception as e:
        print(f"Unexpected error in scrape_google: {str(e)}")
        return None


def scrape_website(url):
    """Scrapes the website for basic details."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
    }

    try:
        print(f"\nScraping: {url}")
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        title_tag = soup.find("title")
        name = title_tag.text.strip() if title_tag else "No Name"
        print(f"Found title: {name}")

        description = ""

        meta_desc = soup.find("meta", {"name": "description"}) or \
                    soup.find("meta", {"property": "og:description"})
        if meta_desc and meta_desc.get("content"):
            description = meta_desc["content"].strip()

        if not description:
            paragraphs = soup.find_all("p")
            description = " ".join(p.text.strip() for p in paragraphs[:3])

        if not description:
            description = "No description available"

        return {
            "name": name,
            "website": url,
            "description": description[:1000]
        }
    except requests.exceptions.RequestException as e:
        print(f"Error scraping {url}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error scraping {url}: {e}")
        return None



def summarize_with_groq(company_info, api_key):
    """Uses Groq API to summarize company data."""
    if not company_info:
        return "No company information available to summarize."

    try:
        client = Groq(api_key=api_key)

        prompt = f"""Analyze this real estate developer company in Surat:
        Name: {company_info['name']}
        Website: {company_info['website']}
        Description: {company_info['description']}

        Please provide a detailed summary including:
        1. Company overview and main business focus
        2. Types of projects and developments
        3. Notable features or specializations
        4. Market presence in Surat
        """

        completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a real estate industry analyst focusing on the Surat market. Provide concise but informative analysis."
                },
                {"role": "user", "content": prompt}
            ],
            model="mixtral-8x7b-32768",  # Or another suitable model
            temperature=0.3,
            max_tokens=500
        )

        return completion.choices[0].message.content
    except Exception as e:
        print(f"Error in summarize_with_groq: {str(e)}")
        return f"Error generating summary: {str(e)}"


if __name__ == "__main__":
    company_name = "Avadh Projects Surat"  # Example
    groq_api_key = "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"  # Replace with your actual key

    print(f"Starting search for: {company_name}")
    website_url = scrape_google(company_name)

    if website_url:
        print(f"\nFound website: {website_url}")
        company_data = scrape_website(website_url)
        if company_data:
            print("\nGenerating summary...")
            summary = summarize_with_groq(company_data, groq_api_key)
            print("\nCompany Analysis:")
            print(summary)
        else:
            print("Failed to scrape website data")


IndentationError: expected an indented block after function definition on line 53 (1041811738.py, line 58)

In [36]:
import requests
from bs4 import BeautifulSoup
from groq import Groq
from urllib.parse import urlencode, urlparse
import time
import logging
import os

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

def scrape_google(company_name):
    """Scrapes Google Search results for a company website."""
    search_query = {
        'q': f"{company_name} real estate developer Surat",
        'num': '10'  # Adjust the number of results
    }
    search_url = f"https://www.google.com/search?{urlencode(search_query)}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.google.com/",
    }

    try:
        logging.info(f"Searching Google for: {search_url}")
        response = requests.get(search_url, headers=headers, timeout=15)
        response.raise_for_status()  # Raise an exception for bad status codes

        soup = BeautifulSoup(response.text, "html.parser")
        all_links = soup.find_all("a", href=True)
        logging.info(f"Found {len(all_links)} links")

        for link in all_links:
            href = link.get("href", "")
            if href.startswith("http"):
                parsed_url = urlparse(href)
                hostname = parsed_url.netloc

                # Filter out search engine domains and check for company name in URL
                if (not any(domain in hostname for domain in ["google.com", "bing.com", "yahoo.com"]) and
                    company_name.lower() in hostname.lower()):
                    logging.info(f"Selected URL: {href}")
                    return href  # Return the first valid link

        return None  # No valid link found

    except requests.exceptions.RequestException as e:
        logging.error(f"Error in scrape_google: {str(e)}")
        return None
    except Exception as e:
        logging.error(f"Unexpected error in scrape_google: {str(e)}")
        return None


def scrape_website(url):
    """Scrape company website for relevant information."""
    try:
        logging.info(f"Scraping website: {url}")
        response = requests.get(url, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        # Extract relevant data (e.g., company name, description, projects)
        company_name = soup.title.string if soup.title else "Unknown"
        description = soup.find("meta", attrs={"name": "description"})
        description = description["content"] if description else "No description available."

        return {
            "name": company_name,
            "website": url,
            "description": description,
        }

    except requests.exceptions.RequestException as e:
        logging.error(f"Error in scrape_website: {str(e)}")
        return None
    except Exception as e:
        logging.error(f"Unexpected error in scrape_website: {str(e)}")
        return None


def summarize_with_groq(company_info, api_key):
    """Summarize company information using Groq API."""
    if not company_info:
        return "No company information available to summarize."

    client = Groq(api_key=api_key)

    prompt = f"""Analyze this real estate developer company in Surat:
    Name: {company_info['name']}
    Website: {company_info['website']}
    Description: {company_info['description']}

    Please provide a detailed summary including:
    1. Company overview and main business focus
    2. Types of projects and developments
    3. Notable features or specializations
    4. Market presence in Surat
    """

    completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a real estate industry analyst focusing on the Surat market. Provide concise but informative analysis."
            },
            {"role": "user", "content": prompt}
        ],
        model="mixtral-8x7b-32768",  # Or another suitable model
        temperature=0.3,
        max_tokens=500
    )

    return completion.choices[0].message.content



if __name__ == "__main__":
    company_name = "Avadh Projects Surat"  # Replace with the desired company name
    groq_api_key = os.getenv("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")  # Use environment variable for API key

    if not groq_api_key:
        logging.error("GROQ_API_KEY environment variable not set.")
        exit(1)

    logging.info(f"Starting search for: {company_name}")
    website_url = scrape_google(company_name)

    if website_url:
        logging.info(f"Found website: {website_url}")
        company_data = scrape_website(website_url)

        if company_data:
            logging.info("Generating summary...")
            summary = summarize_with_groq(company_data, groq_api_key)
            logging.info("\nCompany Analysis:")
            print(summary)
        else:
            logging.error("Failed to scrape website data")
    else:
        logging.error("Could not find any relevant websites.")

    time.sleep(2)  # Delay for the next search (if doing multiple searches)

2025-02-01 14:07:50,991 - ERROR - GROQ_API_KEY environment variable not set.
2025-02-01 14:07:50,993 - INFO - Starting search for: Avadh Projects Surat
2025-02-01 14:07:50,995 - INFO - Searching Google for: https://www.google.com/search?q=Avadh+Projects+Surat+real+estate+developer+Surat&num=10
2025-02-01 14:07:52,855 - INFO - Found 3 links
2025-02-01 14:07:52,868 - ERROR - Could not find any relevant websites.


In [39]:
import requests
from bs4 import BeautifulSoup
from groq import Groq
from urllib.parse import urlencode, urlparse
import logging
import os

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

def scrape_bing(company_name):
    """Scrapes Bing Search results to find the first relevant company website."""
    search_query = {
        'q': f"{company_name} real estate developer Surat",
        'count': '10'  # Number of results to retrieve
    }
    search_url = f"https://www.bing.com/search?{urlencode(search_query)}"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Referer": "https://www.bing.com/",
    }

    try:
        logging.info(f"Searching Bing for: {search_url}")
        response = requests.get(search_url, headers=headers, timeout=15)
        response.raise_for_status()  # Raise an exception for bad status codes

        soup = BeautifulSoup(response.text, "html.parser")
        all_links = soup.find_all("a", href=True)
        logging.info(f"Found {len(all_links)} links")

        for link in all_links:
            href = link.get("href", "")
            if href.startswith("http"):
                parsed_url = urlparse(href)
                hostname = parsed_url.netloc

                # Filter out Bing-related domains and check for company name in URL
                if (not any(domain in hostname for domain in ["bing.com", "microsoft.com"]) and
                    company_name.lower() in hostname.lower()):
                    logging.info(f"Selected URL: {href}")
                    return href  # Return the first valid link

        return None  # No valid link found

    except requests.exceptions.RequestException as e:
        logging.error(f"Error in scrape_bing: {str(e)}")
        return None
    except Exception as e:
        logging.error(f"Unexpected error in scrape_bing: {str(e)}")
        return None


def scrape_website(url):
    """Scrape the first website for basic information."""
    try:
        logging.info(f"Scraping website: {url}")
        response = requests.get(url, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        # Extract basic information
        company_name = soup.title.string if soup.title else "Unknown"
        description = soup.find("meta", attrs={"name": "description"})
        description = description["content"] if description else "No description available."

        return {
            "name": company_name,
            "website": url,
            "description": description,
        }

    except requests.exceptions.RequestException as e:
        logging.error(f"Error in scrape_website: {str(e)}")
        return None
    except Exception as e:
        logging.error(f"Unexpected error in scrape_website: {str(e)}")
        return None


def summarize_with_groq(company_info, api_key):
    """Summarize company information using Groq API."""
    if not company_info:
        return "No company information available to summarize."

    client = Groq(api_key=api_key)

    prompt = f"""Analyze this real estate developer company in Surat:
    Name: {company_info['name']}
    Website: {company_info['website']}
    Description: {company_info['description']}

    Please provide a brief summary including:
    1. Company overview
    2. Main business focus
    3. Notable features or specializations
    """

    completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "You are a real estate industry analyst focusing on the Surat market. Provide concise but informative analysis."
            },
            {"role": "user", "content": prompt}
        ],
        model="mixtral-8x7b-32768",  # Or another suitable model
        temperature=0.3,
        max_tokens=300
    )

    return completion.choices[0].message.content


if __name__ == "__main__":
    company_name = "Avadh Projects Surat"  # Replace with the desired company name
    groq_api_key = os.getenv("GROQ_API_KEY")  # Use environment variable for API key

    if not groq_api_key:
        logging.error("GROQ_API_KEY environment variable not set.")
        exit(1)

    logging.info(f"Starting search for: {company_name}")
    website_url = scrape_bing(company_name)

    if website_url:
        logging.info(f"Found website: {website_url}")
        company_data = scrape_website(website_url)

        if company_data:
            logging.info("Generating summary...")
            summary = summarize_with_groq(company_data, groq_api_key)
            logging.info("\nCompany Analysis:")
            print(summary)
        else:
            logging.error("Failed to scrape website data")
    else:
        logging.error("Could not find any relevant websites.")

2025-02-01 14:10:53,741 - ERROR - GROQ_API_KEY environment variable not set.
2025-02-01 14:10:53,743 - INFO - Starting search for: Avadh Projects Surat
2025-02-01 14:10:53,745 - INFO - Searching Bing for: https://www.bing.com/search?q=Avadh+Projects+Surat+real+estate+developer+Surat&count=10
2025-02-01 14:10:54,963 - INFO - Found 104 links
2025-02-01 14:10:54,969 - ERROR - Could not find any relevant websites.


In [40]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def bing_search(company_name):
    """Scrapes Bing Search to find the official website."""
    search_url = f"https://www.bing.com/search?q={company_name.replace(' ', '+')}+real+estate+Surat"
    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    response = requests.get(search_url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")
    
    # Extract the first valid website link
    for link in soup.select("li.b_algo a"):
        href = link["href"]
        if "avadhprojects.com" in href:  # Official website check
            return href

    return None

def scrape_company_website(url):
    """Scrapes the company's official website for details."""
    headers = {"User-Agent": "Mozilla/5.0"}
    
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")

    name = soup.find("title").text.strip() if soup.find("title") else "No Name"
    
    description = ""
    meta_desc = soup.find("meta", {"name": "description"})
    if meta_desc:
        description = meta_desc["content"]
    else:
        first_p = soup.find("p")
        description = first_p.text.strip() if first_p else "No description available"

    return {"name": name, "website": url, "description": description}

def scrape_competitors():
    """Scrapes real estate competitors from a listing website."""
    competitors_url = "https://houssed.com/blog/guides/top-builders-in-surat"
    headers = {"User-Agent": "Mozilla/5.0"}

    response = requests.get(competitors_url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")

    competitors = []
    for h3 in soup.find_all("h3"):
        name = h3.text.strip()
        competitors.append(name)

    return competitors

if __name__ == "__main__":
    company_name = "Avadh Group Surat"

    website_url = bing_search(company_name)
    
    if website_url:
        company_data = scrape_company_website(website_url)
        competitors = scrape_competitors()
        
        print("🏢 **Company Info:**")
        print(company_data)
        print("\n🏗️ **Top Competitors:**")
        print("\n".join(competitors))
    else:
        print("Could not find the official website.")


Could not find the official website.


In [78]:
import requests
from groq import Groq



def get_company_info(company_name: str, location: str, api_key: str):
    """Fetches company details using Groq AI."""
    client = Groq(api_key=api_key)

    # Create a structured prompt for the AI model
    prompt = f"""
    Extract key details about the company based on the given prompt:
    
    Company: {company_name}
    Location: {location}
    
    Provide the following details:
    1. Official Name
    2. Industry Type
    3. Headquarters / Main Office Location
    4. Key Products / Services
    5. Website (if available)

    Write the competitiors based on the same location, sector and services

    Strictly always return response in JSON format as shown in example
    """

    prompt += """
    Example: 
    Input :
    {
        company_name = "Avadh Group",
        location = "Surat, India"
    }

    Output : 
    {
    "company": {
        "official_name": "Avadh Group",
        "industry_type": "Real Estate and Construction",
        "headquarters": "Surat, India",
        "key_products_services": [
        "Development of residential and commercial projects (apartments, villas, office spaces)",
        "Property management",
        "Interior design",
        "Construction"
        ],
        "website": "Not Available"
    },
    "competitors": [
        {
        "name": "Sangini Group",
        "industry_type": "Real Estate and Construction",
        "headquarters": "Surat, India",
        "key_products_services": [
            "Residential and commercial real estate development"
        ]
        },
        {
        "name": "Rajhans Group",
        "industry_type": "Real Estate and Construction",
        "headquarters": "Surat, India",
        "key_products_services": [
            "Property development",
            "Interior design",
            "Construction"
        ]
        },
        {
        "name": "JRD Group",
        "industry_type": "Real Estate and Construction",
        "headquarters": "Surat, India",
        "key_products_services": [
            "Development of residential and commercial projects (apartments, villas, office spaces)"
        ]
        },
        {
        "name": "BK Jewels",
        "industry_type": "Real Estate and Construction",
        "headquarters": "Surat, India",
        "key_products_services": [
            "Residential and commercial real estate development (apartments, villas, office spaces)"
        ]
        },
        {
        "name": "SPC Group",
        "industry_type": "Real Estate and Construction",
        "headquarters": "Surat, India",
        "key_products_services": [
            "Property development",
            "Interior design",
            "Construction"
        ]
        }
    ]
    }
    """

    # Call Groq API
    completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant that extracts structured company information."},
            {"role": "user", "content": prompt}
        ],
        model="llama-3.3-70b-versatile",
        temperature=0.3,
        max_tokens=500
    )

    # Extract response
    return completion.choices[0].message.content

if __name__ == "__main__":
    company_name = "Avadh Group"
    location = "Surat, India"
    api_key = "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"  # Replace with your actual Groq API key

    company_info = get_company_info(company_name, location, api_key)
    print("🏢 Company Information:\n")
    print(company_info)


2025-02-01 15:21:41,149 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


🏢 Company Information:

```
{
    "company": {
        "official_name": "Reliance Industries Limited",
        "industry_type": "Conglomerate",
        "headquarters": "Mumbai, India",
        "key_products_services": [
            "Petroleum refining and marketing",
            "Petrochemicals",
            "Retail",
            "Digital services",
            "Telecommunications"
        ],
        "website": "https://www.ril.com/"
    },
    "competitors": [
        {
            "name": "Tata Group",
            "industry_type": "Conglomerate",
            "headquarters": "Mumbai, India",
            "key_products_services": [
                "Automotive manufacturing",
                "Steel production",
                "Power generation",
                "Telecommunications"
            ]
        },
        {
            "name": "Adani Group",
            "industry_type": "Conglomerate",
            "headquarters": "Ahmedabad, India",
            "key_products_services": [
      