In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_page(url):
    response = requests.get(url)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, 'html.parser')
    doctors = []

    doctor_divs = soup.find_all('div', class_='listing-doctor-card', attrs={'data-qa-id': 'doctor_card'})
    print(f"Found {len(doctor_divs)} doctor divs on this page")  # Debug line

    for doctor_div in doctor_divs:
        try:
            name = doctor_div.find('h2', attrs={'data-qa-id': 'doctor_name'}).text.strip()
            specialization_div = doctor_div.find('div', class_='u-grey_3-text')
            specialization_span = specialization_div.find('span')
            specialization = specialization_span.text.strip() if specialization_span else None
            experience_div = specialization_div.find('div', attrs={'data-qa-id': 'doctor_experience'})
            experience = experience_div.text.strip() if experience_div else None
            clinic_name = doctor_div.find('span', attrs={'data-qa-id': 'doctor_clinic_name'}).text.strip()
            fees = doctor_div.find('span', attrs={'data-qa-id': 'consultation_fee'}).text.strip()
            locality = doctor_div.find('span', attrs={'data-qa-id': 'practice_locality'}).text.strip()
            city = doctor_div.find('span', attrs={'data-qa-id': 'practice_city'}).text.strip()
            address = f"{clinic_name}, {locality}, {city}"
            timing_element = doctor_div.find('span', attrs={'data-qa-id': 'availability_text'})
            timing = timing_element.text.strip() if timing_element else None

            doctors.append({
                'Name': name,
                'Specialization': specialization,
                'Experience': experience,
                'Clinic Name': clinic_name,
                'Fees': fees,
                'Address': address,
                'Timing': timing
            })
        except AttributeError as e:
            print(f"Error extracting data: {e}")
            continue

    return doctors

def get_next_page(soup):
    next_button = soup.find('a', attrs={'data-qa-id': 'pagination_next'})
    if next_button and 'href' in next_button.attrs:
        return 'https://www.practo.com' + next_button['href']
    return None

base_url = 'https://www.practo.com/coimbatore/doctors?page=1'
all_doctors_data = []
page_count = 1

while page_count <= 10:  # Assuming there are 10 pages
    current_url = f"{base_url}&page={page_count}"
    print(f"Scraping page {page_count}")
    doctors_data = scrape_page(current_url)
    all_doctors_data.extend(doctors_data)

    page_count += 1
    time.sleep(2)

df = pd.DataFrame(all_doctors_data)
df.to_csv('doctors_in_coimbatore.csv', index=False)

print('Scraping complete. Data saved to doctors_in_coimbatore.csv')

Scraping page 1
Found 10 doctor divs on this page
Scraping page 2
Found 10 doctor divs on this page
Scraping page 3
Found 10 doctor divs on this page
Scraping page 4
Found 10 doctor divs on this page
Scraping page 5
Found 10 doctor divs on this page
Scraping page 6
Found 10 doctor divs on this page
Scraping page 7
Found 10 doctor divs on this page
Scraping page 8
Found 10 doctor divs on this page
Scraping page 9
Found 10 doctor divs on this page
Scraping page 10
Found 10 doctor divs on this page
Scraping complete. Data saved to doctors_in_coimbatore.csv
