In [22]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Scrape job links from a single page
def scrape_job_links(url, base_url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch {url}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    job_links = []

    job_cards = soup.find_all('div', class_='sout-jobs-wrapper')
    for job_card in job_cards:
        relative_link = job_card.find('a', href=True)['href'] if job_card.find('a', href=True) else "N/A"
        full_link = f"{base_url}{relative_link}" if relative_link.startswith('jobdetails.asp') else relative_link
        job_links.append(full_link)

    return job_links

# Scrape detailed information from a single job page
def scrape_job_details(job_url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
    response = requests.get(job_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch {job_url}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    try:
        title = soup.find('h2', class_='jtitle').text.strip() if soup.find('h2', class_='jtitle') else "N/A"
        company = soup.find('h2', class_='cname').text.strip() if soup.find('h2', class_='cname') else "N/A"
        description = soup.find('div', class_='jobcontent').text.strip() if soup.find('div', class_='jobcontent') else "N/A"
        education = soup.find('div', class_='col-sm-12 mb-3').text.strip() if soup.find('div', class_='col-sm-12 mb-3') else "N/A"
        experience = soup.find('div', class_='skills').text.strip() if soup.find('div', class_='skills') else "N/A"
        location = soup.find('div', class_='row mb-3').text.strip() if soup.find('div', class_='row mb-3') else "N/A"
        deadline = soup.find('span', class_='deadlinetxt').text.strip() if soup.find('span', class_='deadlinetxt') else "N/A"
        apply_link = soup.find('a', {'class': 'btn applynow'})['href'] if soup.find('a', {'class': 'btn applynow'}) else "N/A"
    except Exception as e:
        print(f"Error scraping job details from {job_url}: {e}")
        return None

    return {
        'Title': title,
        'Company': company,
        'Description': description,
        'Education': education,
        'Experience': experience,
        'Location' : location,
        'Deadline': deadline,
        'Apply Link': apply_link,
    }

def scrape_all_jobs(base_url, num_pages):
    all_jobs = []

    job_links = []
    for page in range(1, num_pages + 1):
        url = f"{base_url}JobSearch.asp?page={page}"
        print(f"Scraping page {page}: {url}")
        job_links.extend(scrape_job_links(url, base_url))
        time.sleep(random.uniform(2, 5))

    for job_link in job_links:
        print(f"Scraping details for {job_link}")
        job_details = scrape_job_details(job_link)
        if job_details:
            all_jobs.append(job_details)
        time.sleep(random.uniform(2, 5))

    return all_jobs

def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    base_url = "https://jobs.bdjobs.com/"
    num_pages = 1

    job_data = scrape_all_jobs(base_url, num_pages)
    save_to_csv(job_data, "bdjobs_details.csv")


Scraping page 1: https://jobs.bdjobs.com/JobSearch.asp?page=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1314010&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1314004&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313999&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313997&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313991&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313979&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313982&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313987&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313985&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313419&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313632&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313966&ln=1
Scraping details for https

V2

In [23]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

# Scrape job links from a single page
def scrape_job_links(url, base_url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch {url}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    job_links = []

    job_cards = soup.find_all('div', class_='norm-jobs-wrapper')
    for job_card in job_cards:
        relative_link = job_card.find('a', href=True)['href'] if job_card.find('a', href=True) else "N/A"
        full_link = f"{base_url}{relative_link}" if relative_link.startswith('jobdetails.asp') else relative_link
        job_links.append(full_link)

    return job_links

# Scrape detailed information from a single job page
def scrape_job_details(job_url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
    response = requests.get(job_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch {job_url}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')

    try:
        title = soup.find('h2', class_='jtitle').text.strip() if soup.find('h2', class_='jtitle') else "N/A"
        company = soup.find('h2', class_='cname').text.strip() if soup.find('h2', class_='cname') else "N/A"
        description = soup.find('div', class_='jobcontent').text.strip() if soup.find('div', class_='jobcontent') else "N/A"
        education = soup.find('div', class_='col-sm-12 mb-3').text.strip() if soup.find('div', class_='col-sm-12 mb-3') else "N/A"
        experience = soup.find('div', class_='skills').text.strip() if soup.find('div', class_='skills') else "N/A"
        location = soup.find('div', class_='row mb-3').text.strip() if soup.find('div', class_='row mb-3') else "N/A"
        deadline = soup.find('span', class_='deadlinetxt').text.strip() if soup.find('span', class_='deadlinetxt') else "N/A"
        apply_link = soup.find('a', {'class': 'btn applynow'})['href'] if soup.find('a', {'class': 'btn applynow'}) else "N/A"
    except Exception as e:
        print(f"Error scraping job details from {job_url}: {e}")
        return None

    return {
        'Title': title,
        'Company': company,
        'Description': description,
        'Education': education,
        'Experience': experience,
        'Location': location,
        'Deadline': deadline,
        'Apply Link': apply_link,
    }

# Scrape all jobs across multiple pages
def scrape_all_jobs(base_url, num_pages):
    all_jobs = []

    for page in range(1, num_pages + 1):
        url = f"{base_url}jobsearch.asp?pg={page}&rpp=100&hidJobSearch=JobSearch"
        print(f"Scraping page {page}: {url}")
        job_links = scrape_job_links(url, base_url)
        time.sleep(random.uniform(2, 5))

        for job_link in job_links:
            print(f"Scraping details for {job_link}")
            job_details = scrape_job_details(job_link)
            if job_details:
                all_jobs.append(job_details)
            time.sleep(random.uniform(2, 5))

    return all_jobs

def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    base_url = "https://jobs.bdjobs.com/"
    num_pages = 5

    job_data = scrape_all_jobs(base_url, num_pages)
    save_to_csv(job_data, "bdjobs_details.csv")


Scraping page 1: https://jobs.bdjobs.com/jobsearch.asp?pg=1&rpp=100&hidJobSearch=JobSearch
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1314016&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313978&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313974&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1314006&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313861&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1314003&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313996&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313383&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313989&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313980&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313986&ln=1
Scraping details for https://jobs.bdjobs.com/jobdetails.asp?id=1313972&ln