In [14]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import random
import re
import pandas as pd


In [None]:
# Function to extract company name
def extract_company_name(soup):
    company_name = soup.find('h1')  # Assuming company name is in <h1>
    return company_name.text.strip() if company_name else "NULL"

# Function to extract website URL
def extract_website_url(soup):
    website = soup.find('a', href=True)  # Assuming website URL is in <a> tag with href
    return website['href'] if website else "NULL"

# Function to extract contact number
def extract_contact_number(soup):
    contact_number = soup.find('a', href=lambda href: href and 'tel:' in href)  # Check for 'tel:' in <a>
    return contact_number.text.strip() if contact_number else "NULL"

# Function to extract location/address
def extract_location(soup):
    location = soup.find('address')  # Adjust according to the HTML structure
    return location.text.strip() if location else "NULL"

# Function to extract industry/category
def extract_industry(soup):
    # Search for the <ul> with class 'ourServicesList' inside each <div>
    industry = soup.find('ul', class_='ourServicesList')  # Correct usage of class_

    # If the industry list is found inside the <div>, extract the list items
    if industry:
        # Get all <li> elements (list items) within the <ul> tag
        industry = industry.find_all('li')

        # Return the text of all list items, joined by commas, or 'NULL' if no items
        return ', '.join([item.text.strip() for item in industry]) if industry else "NULL"

    # If no <ul> with class 'ourServicesList' is found in any <div>, return 'NULL'
    return "NULL"

# Function to extract company description
def extract_company_description(soup):
    description = soup.find('div', class_='company-description')  # Adjust according to the HTML structure
    return description.text.strip() if description else "NULL"

# Function to extract email address
def extract_email(soup):
    email = soup.find('a', href=lambda href: href and 'mailto:' in href)  # Check for 'mailto:' in <a>
    return email['href'][7:] if email else "NULL"  # Remove 'mailto:' from href

# Main function to extract all information and save to CSV
def extract_company_details(urls):
    all_company_data = []  # List to store company data for all URLs

    for url in urls:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to retrieve the webpage for {url}")
            continue

        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract company details
        company_name = extract_company_name(soup)
        website_url = extract_website_url(soup)
        contact_number = extract_contact_number(soup)
        location = extract_location(soup)
        industry = extract_industry(soup)
        company_description = extract_company_description(soup)
        email_address = extract_email(soup)

        # Display extracted details in terminal
        print(f"Company Name: {company_name}")
        print(f"Website URL: {website_url}")
        print(f"Contact Number: {contact_number}")
        print(f"Location/Address: {location}")
        print(f"Industry/Category: {industry}")
        print(f"Company Description: {company_description}")
        print(f"Email Address: {email_address}")

        # Add company details to list
        all_company_data.append({
            'Company Name': company_name,
            'Website URL': website_url,
            'Contact Number': contact_number,
            'Location/Address': location,
            'Industry/Category': industry,
            'Company Description': company_description,
            'Email Address': email_address
        })

    # Create a DataFrame to save to CSV
    df = pd.DataFrame(all_company_data)

    # Specify the file path to save CSV
    file_path = '/content/drive/MyDrive/Web_Scraping/company_details.csv'  # Replace with your desired path
    df.to_csv(file_path, index=False)
    print(f"Company details saved to '{file_path}'")

# List of URLs (up to 10)
urls = [
    'http://yellowpages.in/b/arvind-packers-and-movers-yousufguda-hyderabad/751762183',
'http://yellowpages.in/b/leo-international-packers-and-movers-secunderabad-hyderabad/900447582',
'http://yellowpages.in/b/om-sai-packers-and-movers-kondapur-hyderabad/526603044',
'http://yellowpages.in/b/anitha-packers-and-movers-yousufguda-hyderabad/822594492',
'http://yellowpages.in/b/agarwal-worldwide-packing-and-shipping-co-secunderabad-hyderabad/591274933',
'http://yellowpages.in/b/rupana-packers-and-movers-kondapur-hyderabad/220392267',
'http://yellowpages.in/b/every1-packers-and-movers-kphb-colony-hyderabad/920329757',
'http://yellowpages.in/b/sk-packers-and-movers-saroor-nagar-hyderabad/602683016',
'http://yellowpages.in/b/priyanka-packers-and-movers-jeedimetla-hyderabad/841604105',
'http://yellowpages.in/b/svr-packers-and-movers-nagole-hyderabad/336263847',
'http://yellowpages.in/b/sahara-movers-and-packers-domestic-and-international-madhapur-hyderabad/217940362',
'http://yellowpages.in/b/deepak-packers-and-movers-jubilee-hills-hyderabad/541328042',
'http://yellowpages.in/b/vinit-packers-and-movers-old-bowenpally-hyderabad/889316062',
'http://yellowpages.in/b/ramesh-packers-and-movers-yousufguda-hyderabad/801041321',
'http://yellowpages.in/b/ashirwad-packers-and-movers-miyapur-hyderabad/977852235',
'http://yellowpages.in/b/clc-packers-and-logistics-old-bowenpally-hyderabad/171803697',
'http://yellowpages.in/b/vijaydurga-packers-and-movers-kothapet-hyderabad/617322102',
'http://yellowpages.in/b/world-star-packers-and-movers-kukatpally-hyderabad/703412400',
'http://yellowpages.in/b/ayyappa-packers-and-movers-khairatabad-hyderabad/956987237',
'http://yellowpages.in/b/prakash-packers-and-movers-yousufguda-hyderabad/817671580',
'http://yellowpages.in/b/a1-packers-and-movers-moosapet-hyderabad/871436479',
'http://yellowpages.in/b/top-packers-and-movers-hafeezpet-hyderabad/617388351',
'http://yellowpages.in/b/supreme-india-packers-and-movers-hafeezpet-hyderabad/992152305',
'http://yellowpages.in/b/ashoka-packers-ranigunj-hyderabad/572155785',
'http://yellowpages.in/b/raju-packers-and-movers-sanath-nagar-hyderabad/396780134',
'http://yellowpages.in/b/sunanda-veg-caterers-safilguda-hyderabad/101525829',
'http://yellowpages.in/b/chawla-essence-mart-secunderabad-hyderabad/573081513',
'http://yellowpages.in/b/ss-flour-mill-toli-chowki-hyderabad/830766677',
'http://yellowpages.in/b/kikaboni-organic-store-manikonda-hyderabad/667478928',
'http://yellowpages.in/b/sampoorna-super-market-shaikpet-hyderabad/874433979',
'http://yellowpages.in/b/balaji-grand-bazar-kondapur-hyderabad/593456806',
'http://yellowpages.in/b/more-super-market-barkatpura-hyderabad/258449111',
'http://yellowpages.in/b/m-hospitality-ameerpet-hyderabad/992820987',
'http://yellowpages.in/b/naheed-fruitsjuice-centre-mehdipatnam-hyderabad/826199561',
'http://yellowpages.in/b/balaji-grand-bazar-banjara-hills-hyderabad/496025036',
'http://yellowpages.in/b/makarand-sweets-kondapur-hyderabad/740686558',
'http://yellowpages.in/b/sansar-kirana-general-store-toli-chowki-hyderabad/363360792',
'http://yellowpages.in/b/spoorthi-super-market-kukatpally-hyderabad/677042337',
'http://yellowpages.in/b/udayashree-megharaj-sweet-shop-narayanguda-hyderabad/315572672',
'http://yellowpages.in/b/vijay-milk-house-attapur-hyderabad/302029518',
'http://yellowpages.in/b/value-mart-super-market-kondapur-hyderabad/338417179',
'http://yellowpages.in/b/anusha-super-market-gudi-malkapur-hyderabad/402067031',
'http://yellowpages.in/b/almonds-for-youalmond-house-himayat-nagar-hyderabad/662356554',
'http://yellowpages.in/b/amrutha-swagruha-foods-kukatpally-hyderabad/327482404',
'http://yellowpages.in/b/almonds-for-youalmond-house-banjara-hills-hyderabad/730757669',
'http://yellowpages.in/b/kirana-ghar-punjagutta-hyderabad/493290284',
'http://yellowpages.in/b/more-super-market-shaikpet-hyderabad/235944075',
'http://yellowpages.in/b/mexican-fried-chicken-kukatpally-hyderabad/789577083',
'http://yellowpages.in/b/venkat-sai-dry-fruits-spices-kukatpally-hyderabad/163946638',
'http://yellowpages.in/b/srinidhi-dry-fruits-spices-kukatpally-hyderabad/345756835'
    # Add more URLs if needed (max 10)
]

# Call function to extract company details
extract_company_details(urls)


Company Name: Arvind Packers And Movers
Website URL: /
Contact Number: +91 9948271742
Location/Address: Yousufguda Hyderabad - 500045
Industry/Category: Packing and Moving Services, Loading and Unloading Services, House Shifting Services, Transportation
Company Description: NULL
Email Address: NULL
Company Name: Leo International Packers and Movers
Website URL: /
Contact Number: +91 9642675555
Location/Address: Secunderabad Hyderabad - 500003
Industry/Category: Packing and Moving Services, Loading and Unloading Services, House Shifting Services, Transportation
Company Description: NULL
Email Address: sales@leointernationalpacker.com
Company Name: Om Sai Packers And Movers
Website URL: /
Contact Number: +91 9246187255
Location/Address: Kondapur Hyderabad - 500081
Industry/Category: Packers And Movers
Company Description: NULL
Email Address: omsaipackersandmovers@gmail.com
Company Name: Anitha Packers & Movers
Website URL: /
Contact Number: +91 9618714331
Location/Address: Yousufguda Hyd

Extract Links


In [11]:
import requests
import csv
from bs4 import BeautifulSoup

http://yellowpages.in/hyderabad/logistics-services/195433585

http://yellowpages.in/hyderabad/food-and-beverages/606286653

In [12]:

# Function to extract all links from all divs with class='popularTitleTextBlock'
def extract_all_links_from_divs(url):
    # Send a GET request to the website
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve webpage. Status code: {response.status_code}")
        return []

    # Parse the HTML content of the webpage
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all divs with the class 'popularTitleTextBlock'
    divs = soup.find_all('div', class_='popularTitleTextBlock')

    if not divs:
        print("No divs with class 'popularTitleTextBlock' found.")
        return []

    # Extract all <a> tags within each div
    links = []
    for div in divs:
        for anchor in div.find_all('a', href=True):
            links.append(anchor['href'])

    return links

# Function to save URLs to a CSV file
def save_to_csv(urls, filename='all_div_links.csv'):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['URL'])
        for url in urls:
            writer.writerow([url])

# Main code
if __name__ == '__main__':
    # URL of the webpage you want to scrape
    webpage_url = input("Enter the URL of the webpage to scrape: ")

    # Extract all links from all the specific divs
    links = extract_all_links_from_divs(webpage_url)

    # Print links to terminal with a comma after each
    if links:
        print("Extracted Links from all 'popularTitleTextBlock' divs:")
        for link in links:
            print(f"'http://yellowpages.in{link}',")

        # Save the links to a CSV file
        save_to_csv(links)
        print(f"\nSaved links to 'all_div_links.csv'.")
    else:
        print("No links found.")


Enter the URL of the webpage to scrape: http://yellowpages.in/hyderabad/logistics-services/195433585
Extracted Links from all 'popularTitleTextBlock' divs:
'http://yellowpages.in/b/priyanka-packers-and-movers-jeedimetla-hyderabad/841604105',
'http://yellowpages.in/b/anitha-packers-and-movers-yousufguda-hyderabad/822594492',
'http://yellowpages.in/b/every1-packers-and-movers-kphb-colony-hyderabad/920329757',
'http://yellowpages.in/b/arvind-packers-and-movers-yousufguda-hyderabad/751762183',
'http://yellowpages.in/b/rupana-packers-and-movers-kondapur-hyderabad/220392267',
'http://yellowpages.in/b/sk-packers-and-movers-saroor-nagar-hyderabad/602683016',
'http://yellowpages.in/b/om-sai-packers-and-movers-kondapur-hyderabad/526603044',
'http://yellowpages.in/b/leo-international-packers-and-movers-secunderabad-hyderabad/900447582',
'http://yellowpages.in/b/agarwal-worldwide-packing-and-shipping-co-secunderabad-hyderabad/591274933',
'http://yellowpages.in/b/svr-packers-and-movers-nagole-hyder

In [13]:

# Function to extract all links from all divs with class='popularTitleTextBlock'
def extract_all_links_from_divs(url):
    # Send a GET request to the website
    response = requests.get(url)

    if response.status_code != 200:
        print(f"Failed to retrieve webpage. Status code: {response.status_code}")
        return []

    # Parse the HTML content of the webpage
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all divs with the class 'popularTitleTextBlock'
    divs = soup.find_all('div', class_='popularTitleTextBlock')

    if not divs:
        print("No divs with class 'popularTitleTextBlock' found.")
        return []

    # Extract all <a> tags within each div
    links = []
    for div in divs:
        for anchor in div.find_all('a', href=True):
            links.append(anchor['href'])

    return links

# Function to save URLs to a CSV file
def save_to_csv(urls, filename='all_div_links.csv'):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['URL'])
        for url in urls:
            writer.writerow([url])

# Main code
if __name__ == '__main__':
    # URL of the webpage you want to scrape
    webpage_url = input("Enter the URL of the webpage to scrape: ")

    # Extract all links from all the specific divs
    links = extract_all_links_from_divs(webpage_url)

    # Print links to terminal with a comma after each
    if links:
        print("Extracted Links from all 'popularTitleTextBlock' divs:")
        for link in links:
            print(f"'http://yellowpages.in{link}',")

        # Save the links to a CSV file
        save_to_csv(links)
        print(f"\nSaved links to 'all_div_links.csv'.")
    else:
        print("No links found.")


Enter the URL of the webpage to scrape: http://yellowpages.in/hyderabad/food-and-beverages/606286653
Extracted Links from all 'popularTitleTextBlock' divs:
'http://yellowpages.in/b/sunanda-veg-caterers-safilguda-hyderabad/101525829',
'http://yellowpages.in/b/chawla-essence-mart-secunderabad-hyderabad/573081513',
'http://yellowpages.in/b/ss-flour-mill-toli-chowki-hyderabad/830766677',
'http://yellowpages.in/b/sampoorna-super-market-shaikpet-hyderabad/874433979',
'http://yellowpages.in/b/balaji-grand-bazar-kondapur-hyderabad/593456806',
'http://yellowpages.in/b/kikaboni-organic-store-manikonda-hyderabad/667478928',
'http://yellowpages.in/b/spoorthi-super-market-kukatpally-hyderabad/677042337',
'http://yellowpages.in/b/amrutha-swagruha-foods-kukatpally-hyderabad/327482404',
'http://yellowpages.in/b/more-super-market-barkatpura-hyderabad/258449111',
'http://yellowpages.in/b/kirana-ghar-punjagutta-hyderabad/493290284',
'http://yellowpages.in/b/venkat-sai-dry-fruits-spices-kukatpally-hyderab