In [47]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Function to check if a string is in English
def is_english(text):
    return bool(re.match(r'^[a-zA-Z0-9\s,.\'-]*$', text))

# Function to scrape data from one page
def scrape_page(page_url):
    response = requests.get(page_url)
    if response.status_code != 200:
        print(f"Failed to fetch page {page_url}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    listings = soup.find_all('div', class_='content--3JNQz')
    
    data = []
    for listing in listings:
        title_element = listing.find('h2')
        title = title_element.get_text(strip=True) if title_element else 'N/A'
        
        if is_english(title):
            price_element = listing.find('div', class_='price--3SnqI')
            price = price_element.get_text(strip=True) if price_element else 'N/A'
            
            data.append({
                'Title': title,
                'Price': price
            })
    return data

# Function to scrape multiple pages
def scrape_multiple_pages(base_url, num_pages):
    all_data = []
    for page in range(1, num_pages + 1):
        print(f"Scraping page {page}")
        page_url = f"{base_url}&page={page}"
        all_data.extend(scrape_page(page_url))
    return all_data

# Main function
def main():
    base_url = "https://bikroy.com/en/ads/dhaka/apartments-for-sale?sort=date&order=desc&buy_now=0&urgent=0"
    num_pages = 5  # Change this to the number of pages you want to scrape
    data = scrape_multiple_pages(base_url, num_pages)
    
    # Save data to Excel
    df = pd.DataFrame(data)
    df.to_excel('bikroy_apartments.xlsx', index=False)
    print("Data saved to bikroy_apartments.xlsx")

if __name__ == "__main__":
    main()


Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5
Data saved to bikroy_apartments.xlsx


In [51]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Function to check if a string is in English
def is_english(text):
    return bool(re.match(r'^[a-zA-Z0-9\s,.\'-]*$', text))

# Function to scrape the address from a listing page
def get_address(listing_url):
    response = requests.get(listing_url)
    if response.status_code != 200:
        print(f"Failed to fetch listing page: {listing_url}")
        return "N/A"
    
    soup = BeautifulSoup(response.text, 'html.parser')
    address_element = soup.find('div', class_='word-break--2nyVq value--1lKHt')
    return address_element.get_text(strip=True) if address_element else "N/A"

# Function to scrape data from one page
def scrape_page(page_url):
    response = requests.get(page_url)
    if response.status_code != 200:
        print(f"Failed to fetch page {page_url}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    listings = soup.find_all('a', class_='card-link--3ssYv gtm-ad-item')
    
    data = []
    for listing in listings:
        title_element = listing.find('h2')
        title = title_element.get_text(strip=True) if title_element else 'N/A'
        
        if is_english(title):
            price_element = listing.find('div', class_='price--3SnqI')
            price = price_element.get_text(strip=True) if price_element else 'N/A'
            
            listing_url = f"https://bikroy.com{listing['href']}"
            address = get_address(listing_url)
            
            data.append({
                'Title': title,
                'Price': price,
                'Address': address
            })
    return data

# Function to scrape multiple pages
def scrape_multiple_pages(base_url, num_pages):
    all_data = []
    for page in range(1, num_pages + 1):
        print(f"Scraping page {page}")
        page_url = f"{base_url}&page={page}"
        all_data.extend(scrape_page(page_url))
    return all_data

# Main function
def main():
    base_url = "https://bikroy.com/en/ads/dhaka/apartments-for-sale?sort=date&order=desc&buy_now=0&urgent=0"
    num_pages = 5  # Adjust the number of pages to scrape
    data = scrape_multiple_pages(base_url, num_pages)
    
    # Save data to Excel
    df = pd.DataFrame(data)
    df.to_excel('bikroy_apartments_with_addresses.xlsx', index=False)
    print("Data saved to bikroy_apartments_with_addresses.xlsx")

if __name__ == "__main__":
    main()


Scraping page 1
Scraping page 2
Scraping page 3
Scraping page 4
Scraping page 5
Data saved to bikroy_apartments_with_addresses.xlsx


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from tqdm import tqdm  # Progress bar library

# Function to check if a string is in English
def is_english(text):
    return bool(re.match(r'^[a-zA-Z0-9\s,.\'-]*$', text))

# Function to scrape the address from a listing page
def get_address(listing_url):
    response = requests.get(listing_url)
    if response.status_code != 200:
        print(f"Failed to fetch listing page: {listing_url}")
        return "N/A"
    
    soup = BeautifulSoup(response.text, 'html.parser')
    address_element = soup.find('div', class_='word-break--2nyVq value--1lKHt')
    return address_element.get_text(strip=True) if address_element else "N/A"

# Function to scrape data from one page
def scrape_page(page_url):
    response = requests.get(page_url)
    if response.status_code != 200:
        print(f"Failed to fetch page {page_url}")
        return []
    
    soup = BeautifulSoup(response.text, 'html.parser')
    listings = soup.find_all('a', class_='card-link--3ssYv gtm-ad-item')
    
    data = []
    for listing in tqdm(listings, desc="Processing listings", unit="listing"):
        title_element = listing.find('h2')
        title = title_element.get_text(strip=True) if title_element else 'N/A'
        
        if is_english(title):
            price_element = listing.find('div', class_='price--3SnqI')
            price = price_element.get_text(strip=True) if price_element else 'N/A'
            
            listing_url = f"https://bikroy.com{listing['href']}"
            address = get_address(listing_url)
            
            data.append({
                'Title': title,
                'Price': price,
                'Address': address
            })
    return data

# Function to scrape multiple pages
def scrape_multiple_pages(base_url, num_pages):
    all_data = []
    for page in tqdm(range(1, num_pages + 1), desc="Scraping pages", unit="page"):
        page_url = f"{base_url}&page={page}"
        page_data = scrape_page(page_url)
        all_data.extend(page_data)
    return all_data

# Main function
def main():
    base_url = "https://bikroy.com/en/ads/dhaka/apartments-for-sale?sort=date&order=desc&buy_now=0&urgent=0"
    num_pages = 100  # Adjust the number of pages to scrape
    data = scrape_multiple_pages(base_url, num_pages)
    
    # Save data to Excel
    df = pd.DataFrame(data)
    df.to_excel('bikroy_apartments_with_addresses.xlsx', index=False)
    print("Data saved to bikroy_apartments_with_addresses.xlsx")

if __name__ == "__main__":
    main()


Scraping pages:   0%|                                                                        | 0/100 [00:00<?, ?page/s]
Processing listings:   0%|                                                                 | 0/25 [00:00<?, ?listing/s][A
Processing listings:  12%|██████▊                                                  | 3/25 [00:00<00:07,  3.08listing/s][A
Processing listings:  20%|███████████▍                                             | 5/25 [00:01<00:07,  2.61listing/s][A
Processing listings:  32%|██████████████████▏                                      | 8/25 [00:03<00:07,  2.30listing/s][A
Processing listings:  36%|████████████████████▌                                    | 9/25 [00:04<00:09,  1.61listing/s][A
Processing listings:  40%|██████████████████████▍                                 | 10/25 [00:05<00:10,  1.46listing/s][A
Processing listings:  44%|████████████████████████▋                               | 11/25 [00:07<00:14,  1.04s/listing][A
Processing listings

Data saved to bikroy_apartments_with_addresses.xlsx



