In [10]:
import requests
from bs4 import BeautifulSoup
import csv
import re

# Set the URL of the directory page
BASE_URL = "https://www.yellowpages.com/search?search_terms=restaurants&geo_location_terms=New+York%2C+NY"

# Headers to mimic a browser visit
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}

# Function to scrape data from a single page
def scrape_page(url):
    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.text, "html.parser")

    # Container for storing business data
    business_data = []

    # Find business listings on the page
    listings = soup.find_all("div", class_="result")
    for listing in listings:
        try:
            # Extract company name
            name = listing.find("a", class_="business-name").text.strip()

            # Extract website URL (if available)
            website_link = listing.find("a", class_="track-visit-website")
            website_url = website_link["href"] if website_link else "N/A"

            # Extract contact number
            phone = listing.find("div", class_="phones phone primary").text.strip() if listing.find("div", class_="phones phone primary") else "N/A"

            # Extract address
            address = listing.find("p", class_="adr").text.strip() if listing.find("p", class_="adr") else "N/A"

            # Extract industry/category
            category = listing.find("div", class_="categories").text.strip() if listing.find("div", class_="categories") else "N/A"

            # Extract company description (if available)
            description = listing.find("div", class_="snippet").text.strip() if listing.find("div", class_="snippet") else "N/A"

            # Extract email address (if available)
            # Note: Emails are often not directly listed; this is for example purposes.
            email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", str(listing))
            email = email_match.group(0) if email_match else "N/A"

            # Append data to list
            business_data.append({
                "Company Name": name,
                "Website URL": website_url,
                "Contact Number": phone,
                "Location/Address": address,
                "Industry/Category": category,
                "Company Description": description,
                "Email Address": email,
            })
        except AttributeError:
            # Skip listings with missing data
            continue

    return business_data

# Function to save data to a CSV file
def save_to_csv(data, filename="business_data.csv"):
    keys = data[0].keys()
    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)

# Main script to scrape multiple pages
def main():
    all_businesses = []
    for page in range(1, 6):  # Adjust the range for the number of pages to scrape
        print(f"Scraping page {page}...")
        url = f"{BASE_URL}&page={page}"
        businesses = scrape_page(url)
        all_businesses.extend(businesses)

    # Save scraped data to CSV
    if all_businesses:
        save_to_csv(all_businesses)
        print(f"Scraping complete. Data saved to 'business_data.csv'.")
    else:
        print("No data scraped.")

if __name__ == "__main__":
    main()

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping complete. Data saved to 'business_data.csv'.


In [11]:
import pandas as pd

In [12]:
path="business_data.csv"
data=pd.read_csv(path)

In [13]:
data.head(50
          )

Unnamed: 0,Company Name,Website URL,Contact Number,Location/Address,Industry/Category,Company Description,Email Address
0,Casa Limone,https://casalimonerestaurant.com,(646) 347-2865,,"RestaurantsCrop Dusting, Seeding & SprayingIta...",From Business: Southern Italy in the heart of ...,
1,Dave & Buster's New York City - Times Square,http://www.daveandbusters.com,(646) 495-2015,,RestaurantsAmerican RestaurantsAmusement Place...,"The games are nice for kids, but be aware it d...",
2,Paul & Jimmy's Restaurant,https://www.paulandjimmys.com,(212) 475-9540,,RestaurantsItalian RestaurantsFamily Style Res...,Paul & Jimmy's Restaurant is a hidden gem nest...,
3,Festival Restaurant,,(212) 995-0154,,RestaurantsFamily Style Restaurants,hello i wanted make a website for your beautif...,charrierfreddy@orange.fr
4,Bobby Van's Steakhouse - 54th Street,http://www.bobbyvans.com/54th-street.html,(212) 207-8050,,RestaurantsSteak HousesAmerican Restaurants,I would recommend trying this place to anyone....,
5,Pig & Whistle Pub Restaurant,,(212) 302-0112,,RestaurantsFamily Style Restaurants,,
6,Johnny Rock,,(212) 813-0003,,RestaurantsAmerican RestaurantsHamburgers & Ho...,"the menu is typical burgers and fries, a few s...",
7,S'mac,https://www.eatsmac.com/?y_source=1_MTA1NjA2OD...,(212) 358-7912,,RestaurantsAmerican RestaurantsVegetarian Rest...,Really disappointing. I was put on on hold on ...,
8,Pita Grill Restaurant,,(212) 363-2333,,RestaurantsFamily Style Restaurants,,
9,Sparks Steak House,http://www.sparkssteakhouse.com,(212) 687-4855,,RestaurantsSteak HousesAmerican Restaurants,I've been there and It's Awesome! The Staff we...,
