In [11]:
pip install pandas requests beautifulsoup4

Note: you may need to restart the kernel to use updated packages.




In [26]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_prices(product_name, retail_outlets):
    data = []  # List to store scraped data

    for outlet in retail_outlets:
        outlet_data = {"Retail Outlet": outlet['name']}
        print(f"Retail Outlet: {outlet['name']}")

        # Fetch the webpage
        url = outlet['url']
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            # Search box for Walmart and CVS Pharmacy
            if outlet['name'] in ['Walmart', 'CVS Pharmacy']:
                search_box = soup.find('input', {'type': 'search'})
            elif outlet['name'] == 'Amazon':
                # For Amazon, use a different search box ID
                search_box = soup.find('input', {'id': 'twotabsearchtextbox'})
                # Find the form and submit
                form = search_box.find_parent('form')
                form_action = form['action']
                if not form_action.startswith('http'):
                    # Handle relative URLs
                    form_action = url + form_action
                form_data = {input_.get('name', ''): input_.get('value', '') for input_ in form.find_all('input')}
                response = requests.post(form_action, data=form_data)
                soup = BeautifulSoup(response.content, 'html.parser')

            if search_box:
                search_box.clear()
                search_box.insert(0, product_name)  # Insert product name into search box

                # Delay for a short time to avoid being blocked
                time.sleep(1)

                # Find all product links on the search result page
                product_links = soup.find_all('a', {'class': 'a-link-normal s-no-outline'})
                for link in product_links:
                    if 'href' in link.attrs:
                        product_url = 'https://www.amazon.com' + link['href'] if outlet['name'] == 'Amazon' else link['href']
                        product_response = requests.get(product_url)
                        if product_response.status_code == 200:
                            product_soup = BeautifulSoup(product_response.content, 'html.parser')

                            # Find the product price
                            product_price = product_soup.find('span', {'id': 'priceblock_ourprice'})
                            if product_price:
                                product_price = product_price.get_text().strip()
                                print(product_price)
                                outlet_data["Price"] = product_price
                                break

            else:
                print("Search box not found on the page.")

        else:
            print(f"Error fetching webpage for {outlet['name']}")

        data.append(outlet_data.copy())

    # Create a DataFrame from the scraped data
    df = pd.DataFrame(data)
    if not df.empty:
        print("\nScraped Data:")
        print(df)

        # Save the DataFrame to a CSV file on the desktop
        desktop_path = os.path.join(os.path.expanduser('~'), 'Desktop')
        csv_file_path = os.path.join(desktop_path, f"{product_name}_prices.csv")
        df.to_csv(csv_file_path, index=False)
        print(f"\nData saved to {csv_file_path}")

def main():
    products = {
        "Lecithin-Softgels": "lecithin",
        "Coenzyme Q10": "ubiquinol",
        "Aminomine": "tryptophan",
        "Alpha-Lipoic-Acid-300": "alpha-lipoic acid",
        "Advil": "Ibuprofen"
    }

    # List of selected retail outlets
    retail_outlets = [
        {"name": "Walmart", "url": "https://www.walmart.com"},
        {"name": "Publix Pharmacy", "url": "https://www.publix.com/pharmacy"},
        {"name": "Walgreens Pharmacy", "url": "https://www.walgreens.com/"}
    ]

    for product_name, generic_name in products.items():
        print(f"\nScraping prices for {product_name} ({generic_name})...\n")
        scrape_prices(generic_name, retail_outlets)

if __name__ == "__main__":
    main()



Scraping prices for Lecithin-Softgels (lecithin)...

Retail Outlet: Walmart
Retail Outlet: Publix Pharmacy
Retail Outlet: Walgreens Pharmacy

Scraped Data:
        Retail Outlet
0             Walmart
1     Publix Pharmacy
2  Walgreens Pharmacy

Data saved to C:\Users\jonat\Desktop\lecithin_prices.csv

Scraping prices for Coenzyme Q10 (ubiquinol)...

Retail Outlet: Walmart
Retail Outlet: Publix Pharmacy
Retail Outlet: Walgreens Pharmacy

Scraped Data:
        Retail Outlet
0             Walmart
1     Publix Pharmacy
2  Walgreens Pharmacy

Data saved to C:\Users\jonat\Desktop\ubiquinol_prices.csv

Scraping prices for Aminomine (tryptophan)...

Retail Outlet: Walmart
Retail Outlet: Publix Pharmacy
Retail Outlet: Walgreens Pharmacy

Scraped Data:
        Retail Outlet
0             Walmart
1     Publix Pharmacy
2  Walgreens Pharmacy

Data saved to C:\Users\jonat\Desktop\tryptophan_prices.csv

Scraping prices for Alpha-Lipoic-Acid-300 (alpha-lipoic acid)...

Retail Outlet: Walmart
Retail O