In [1]:
!pip install selenium



In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time


# Global list to hold all medicine entries
all_medicine_data = []


def get_total_pages(driver):
    """Detects total number of pages for pagination."""
    pagination_xpath = '/html/body/main/div/section/div/nav/ul/li'
    pagination_links = driver.find_elements(By.XPATH, pagination_xpath)

    if pagination_links:
        try:
            last_index = len(pagination_links) - 1
            last_page_xpath = f'{pagination_xpath}[{last_index}]/a'
            total_page = int(driver.find_element(By.XPATH, last_page_xpath).text.strip())
        except Exception as e:
            print(f"⚠️ Failed to detect last page: {e}")
            total_page = 1
    else:
        total_page = 1

    return range(1, total_page + 1), total_page


def scrape_single_brand(driver):
    """Scrapes all medicine data for a single brand across its pages."""
    k_range, total_page = get_total_pages(driver)

    for k in k_range:
        for j in range(1, 5):
            for i in range(1, 11):
                try:
                    xpath_base = '/html/body/main/div/section/div/div[2]/div[{j}]/a[{i}]/div/div[{n}]'

                    # Brand info
                    brand_header = driver.find_element(By.XPATH, '/html/body/main/div/section/div/div[1]/h1')
                    brand_type = brand_header.find_element(By.TAG_NAME, 'small').text
                    brand_name = brand_header.text.replace(brand_type, '').strip()

                    # Medicine details
                    name_div = driver.find_element(By.XPATH, xpath_base.format(j=j, i=i, n=1))
                    medicine_type = name_div.find_element(By.TAG_NAME, 'span').text
                    medicine_name = name_div.text.replace(medicine_type, '').strip()

                    medicine_weight = driver.find_element(By.XPATH, xpath_base.format(j=j, i=i, n=2)).text
                    medicine_generic = driver.find_element(By.XPATH, xpath_base.format(j=j, i=i, n=3)).text
                    medicine_price = driver.find_element(By.XPATH, xpath_base.format(j=j, i=i, n=4)).text

                    # Append to global list
                    all_medicine_data.append({
                        'Medicine Name': medicine_name,
                        'Type': medicine_type,
                        'Brand': brand_name,
                        'Strength': medicine_weight,
                        'Generic': medicine_generic,
                        'Price': medicine_price
                    })

                except Exception:
                    continue

        # Move to next brand page if available
        if k < total_page:
            try:
                driver.find_element(By.XPATH, '//a[text()="›"]').click()
                time.sleep(2)
            except:
                print("⚠️ No next page, ending brand early.")
                break


def navigate_to_brand_page(company_url):
    """Navigates to full brand listing or stays on same page."""
    driver = webdriver.Chrome()
    driver.get(company_url)
    time.sleep(2)

    try:
        show_all_xpath = '/html/body/main/div/section/div/div/div[2]/div[3]/a'
        show_all_button = driver.find_element(By.XPATH, show_all_xpath)
        show_all_button.click()
        time.sleep(2)
    except:
        print("ℹ️ 'Show All Brands' link not found — scraping directly.")

    return driver


def scrape_all_herbal_companies():
    """Main scraper: loop through all herbal companies and scrape each brand."""
    main_driver = webdriver.Chrome()
    main_driver.get('https://medex.com.bd/companies?herbal=1')
    time.sleep(2)

    main_window = main_driver.current_window_handle

    for k in range(1, 9):  # Pages 1 to 8
        for j in range(1, 5):
            for i in range(1, 11):
                try:
                    xpath = f'/html/body/main/div/section/div/div[2]/div[{j}]/div[{i}]/div[1]/a'
                    company_element = main_driver.find_element(By.XPATH, xpath)

                    # Open company page in new tab
                    main_driver.execute_script("window.open(arguments[0].href, '_blank');", company_element)
                    time.sleep(1)

                    # Switch to new tab
                    new_window = [w for w in main_driver.window_handles if w != main_window][-1]
                    main_driver.switch_to.window(new_window)

                    company_url = main_driver.current_url
                    print(f"\n🔍 Scraping company: {company_url}")

                    brand_driver = navigate_to_brand_page(company_url)
                    scrape_single_brand(brand_driver)
                    brand_driver.quit()

                    main_driver.close()
                    main_driver.switch_to.window(main_window)

                except Exception as e:
                    print(f"⚠️ Skipping item due to error: {e}")
                    continue

        # Move to next page
        if k < 9:
            try:
                next_btn = main_driver.find_element(By.XPATH, '//a[text()="›"]')
                next_btn.click()
                time.sleep(2)
            except:
                print("⚠️ No more company pages.")
                break

    main_driver.quit()


# ---------- RUN ----------
if __name__ == "__main__":
    scrape_all_herbal_companies()

    # Save to DataFrame and CSV
    df = pd.DataFrame(all_medicine_data)
    #df.to_csv('medicines.csv', index=False)
    print("\n✅ Data saved to 'herbal_medicines.csv'")


⚠️ Skipping item due to error: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/main/div/section/div/div[2]/div[1]/div[1]/div[1]/a"}
  (Session info: chrome=138.0.7204.51); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#nosuchelementexception
Stacktrace:
	GetHandleVerifier [0x0x7ff77b846f95+76917]
	GetHandleVerifier [0x0x7ff77b846ff0+77008]
	(No symbol) [0x0x7ff77b5f9dea]
	(No symbol) [0x0x7ff77b650256]
	(No symbol) [0x0x7ff77b65050c]
	(No symbol) [0x0x7ff77b6a3887]
	(No symbol) [0x0x7ff77b6784af]
	(No symbol) [0x0x7ff77b6a065c]
	(No symbol) [0x0x7ff77b678243]
	(No symbol) [0x0x7ff77b641431]
	(No symbol) [0x0x7ff77b6421c3]
	GetHandleVerifier [0x0x7ff77bb1d2cd+3051437]
	GetHandleVerifier [0x0x7ff77bb17923+3028483]
	GetHandleVerifier [0x0x7ff77bb358bd+3151261]
	GetHandleVerifier [0x0x7ff77b86185e+185662]
	GetHandleVerifier [0x0x7ff77b86971f+218111]
	GetHandleVerifier [0x0x7

In [5]:
df


Unnamed: 0,Medicine Name,Type,Brand,Strength,Generic,Price
0,Abaclor,Capsule,ACI Limited,500 mg,Cefaclor Monohydrate,Unit Price : ৳ 40.27
1,Abaclor,Suspension,ACI Limited,125 mg/5 ml,Cefaclor Monohydrate,100 ml bottle : ৳ 280.00
2,Abaclor,Pediatric Drop,ACI Limited,125 mg/1.25 ml,Cefaclor Monohydrate,15 ml bottle : ৳ 135.00
3,Abaclor,Capsule,ACI Limited,250 mg,Cefaclor Monohydrate,Unit Price : ৳ 21.14
4,Abecab,Tablet,ACI Limited,5 mg+20 mg,Amlodipine Besilate + Olmesartan Medoxomil,Unit Price : ৳ 12.00
...,...,...,...,...,...,...
23499,Zincoral DT,Tablet,Ziska Pharmaceuticals Ltd.,20 mg,Zinc Sulfate Monohydrate,Unit Price : ৳ 2.00
23500,Ziska Oral Saline,Powder,Ziska Pharmaceuticals Ltd.,10.25 gm,Oral rehydration salt [glucose based],10.25 gm sachet : ৳ 6.00
23501,Ziskavit,Tablet,Ziska Pharmaceuticals Ltd.,5 mg+2 mg+2 mg+20 mg,Vitamin B complex,Unit Price : ৳ 0.55
23502,Ziskavit-M,Tablet,Ziska Pharmaceuticals Ltd.,,Multivitamin [Adult preparation],Unit Price : ৳ 2.00


In [6]:
df.to_csv('medicines.csv', index=False)