In [21]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

# Set the correct path to the Firefox binary
firefox_binary_path = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'  # Update this path as needed

# Base URL of the website
base_url = 'https://tasty.co/'

# URL of the snacks recipes page
snacks_url = 'https://tasty.co/tag/breakfast'

# Set up Selenium with Firefox
firefox_options = FirefoxOptions()
firefox_options.binary_location = firefox_binary_path
firefox_options.add_argument("--headless")
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=firefox_options)
driver.get(snacks_url)

# Accept the cookie policy if it appears
try:
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
    ).click()
    time.sleep(2)  # Wait for the cookie banner to disappear
except Exception as e:
    print("No cookie banner found or error:", e)

# Click the "Show more" button multiple times to load more recipes
while True:  # Loop until no more 'Show more' button is found
    try:
        show_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'show-more-button'))
        )
        show_more_button.click()
        time.sleep(2)  # Adjust sleep time if needed
    except Exception as e:
        print("No more 'Show more' button or error:", e)
        break

# Get the page source after clicking "Show more"
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

# Function to get all recipe links on the snacks page
def get_recipe_links(soup):
    recipe_links = []
    for a in soup.find_all('a', href=True):
        href = a['href']
        if href.startswith('/recipe/'):
            link = urljoin(base_url, href)
            recipe_links.append(link)
    return list(set(recipe_links))  # Remove duplicates

# Get the list of recipe links from the snacks page
recipe_links = get_recipe_links(soup)
print("Found recipe links:", len(recipe_links))  # Debug statement

# Initialize an empty list to store recipe details
recipes = []

# Function to scrape details from a single recipe page
def scrape_recipe(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred for {url}: {http_err}")
        return
    except Exception as err:
        print(f"Other error occurred for {url}: {err}")
        return

    soup = BeautifulSoup(response.text, 'html.parser')

    # Function to safely get text from an element
    def get_text_or_none(element):
        return element.text.strip() if element else None

    # Extract recipe details
    recipe_title = get_text_or_none(soup.find('h1', class_='recipe-name extra-bold xs-mb05 md-mb1'))
    author = get_text_or_none(soup.find('div', class_='byline extra-bold xs-text-4 md-text-2'))
    role = get_text_or_none(soup.find('div', class_='role xs-col-12 xs-text-4 md-text-2'))
    score = get_text_or_none(soup.find('span', class_='tips-score-heading extra-bold caps xs-text-5'))
    time_container = soup.find('div', class_='recipe-time-container xs-flex xs-mt2 md-mt0 xs-flex-order-2 xs-mx2 xs-mb3')
    total_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[0].find('p')) if time_container else None
    prep_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[1].find('p')) if time_container else None
    cook_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[2].find('p')) if time_container else None
    ingredients = [li.text.strip() for li in soup.find_all('li', class_='ingredient')]
    preparation_steps = [li.text.strip() for li in soup.find_all('li', class_='xs-mb2')]

    # Extract nutritional information and save in separate columns
    nutrition_elements = { 
        'Calories': None, 'Fat': None, 'Carbs': None, 'Fiber': None, 
        'Sugar': None, 'Protein': None 
    }
    for li in soup.find_all('li', class_='list-unstyled xs-mb1'):
        text = li.get_text(separator=' ')
        for key in nutrition_elements.keys():
            if key in text:
                nutrition_elements[key] = text.replace(key, '').strip()

    # Extract tags
    tags = [tag.text.strip() for tag in soup.find_all('a', class_='breadcrumb_item')]

    # Append the extracted details to the recipes list
    recipes.append({
        'Recipe Title': recipe_title,
        'Author': author,
        'Role': role,
        'Score': score,
        'Total Time': total_time,
        'Prep Time': prep_time,
        'Cook Time': cook_time,
        'Ingredients': ingredients,
        'Preparation Steps': preparation_steps,
        'Calories': nutrition_elements['Calories'],
        'Fat': nutrition_elements['Fat'],
        'Carbs': nutrition_elements['Carbs'],
        'Fiber': nutrition_elements['Fiber'],
        'Sugar': nutrition_elements['Sugar'],
        'Protein': nutrition_elements['Protein'],
        'Tags': tags,
        'Recipe URL': url
    })

# Loop through all recipe links and scrape details
for link in recipe_links:
    scrape_recipe(link)
    time.sleep(1)  # Add delay to avoid overwhelming the server

# Create a DataFrame from the recipes list
df_breakfast = pd.DataFrame(recipes)

# Save the DataFrame to a CSV file
df_breakfast.to_csv('tasty_recipes_breakfast.csv', index=False)

No more 'Show more' button or error: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

Found recipe links: 1406
HTTP error occurred for https://tasty.co/recipe/yukon-gold-cinnamon-rolls: 404 Client Error: Not Found for url: https://tasty.co/recipe/yukon-gold-cinnamon-rolls
HTTP error occurred for https://tasty.co/recipe/recipe-title-crispy-twisted-bacon-sticks: 500 Server Error: Internal Server Error for url: https://tasty.co/recipe/recipe-title-crispy-twisted-bacon-sticks


In [19]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

# Set the correct path to the Firefox binary
firefox_binary_path = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'  # Update this path as needed

# Base URL of the website
base_url = 'https://tasty.co/'

# URL of the snacks recipes page
snacks_url = 'https://tasty.co/tag/dinner'

# Set up Selenium with Firefox
firefox_options = FirefoxOptions()
firefox_options.binary_location = firefox_binary_path
firefox_options.add_argument("--headless")
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=firefox_options)
driver.get(snacks_url)

# Accept the cookie policy if it appears
try:
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
    ).click()
    time.sleep(2)  # Wait for the cookie banner to disappear
except Exception as e:
    print("No cookie banner found or error:", e)

# Click the "Show more" button multiple times to load more recipes
while True:  # Loop until no more 'Show more' button is found
    try:
        show_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'show-more-button'))
        )
        show_more_button.click()
        time.sleep(2)  # Adjust sleep time if needed
    except Exception as e:
        print("No more 'Show more' button or error:", e)
        break

# Get the page source after clicking "Show more"
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

# Function to get all recipe links on the snacks page
def get_recipe_links(soup):
    recipe_links = []
    for a in soup.find_all('a', href=True):
        href = a['href']
        if href.startswith('/recipe/'):
            link = urljoin(base_url, href)
            recipe_links.append(link)
    return list(set(recipe_links))  # Remove duplicates

# Get the list of recipe links from the snacks page
recipe_links = get_recipe_links(soup)
print("Found recipe links:", len(recipe_links))  # Debug statement

# Initialize an empty list to store recipe details
recipes = []

# Function to scrape details from a single recipe page
def scrape_recipe(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred for {url}: {http_err}")
        return
    except Exception as err:
        print(f"Other error occurred for {url}: {err}")
        return

    soup = BeautifulSoup(response.text, 'html.parser')

    # Function to safely get text from an element
    def get_text_or_none(element):
        return element.text.strip() if element else None

    # Extract recipe details
    recipe_title = get_text_or_none(soup.find('h1', class_='recipe-name extra-bold xs-mb05 md-mb1'))
    author = get_text_or_none(soup.find('div', class_='byline extra-bold xs-text-4 md-text-2'))
    role = get_text_or_none(soup.find('div', class_='role xs-col-12 xs-text-4 md-text-2'))
    score = get_text_or_none(soup.find('span', class_='tips-score-heading extra-bold caps xs-text-5'))
    time_container = soup.find('div', class_='recipe-time-container xs-flex xs-mt2 md-mt0 xs-flex-order-2 xs-mx2 xs-mb3')
    total_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[0].find('p')) if time_container else None
    prep_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[1].find('p')) if time_container else None
    cook_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[2].find('p')) if time_container else None
    ingredients = [li.text.strip() for li in soup.find_all('li', class_='ingredient')]
    preparation_steps = [li.text.strip() for li in soup.find_all('li', class_='xs-mb2')]

    # Extract nutritional information and save in separate columns
    nutrition_elements = { 
        'Calories': None, 'Fat': None, 'Carbs': None, 'Fiber': None, 
        'Sugar': None, 'Protein': None 
    }
    for li in soup.find_all('li', class_='list-unstyled xs-mb1'):
        text = li.get_text(separator=' ')
        for key in nutrition_elements.keys():
            if key in text:
                nutrition_elements[key] = text.replace(key, '').strip()

    # Extract tags
    tags = [tag.text.strip() for tag in soup.find_all('a', class_='breadcrumb_item')]

    # Append the extracted details to the recipes list
    recipes.append({
        'Recipe Title': recipe_title,
        'Author': author,
        'Role': role,
        'Score': score,
        'Total Time': total_time,
        'Prep Time': prep_time,
        'Cook Time': cook_time,
        'Ingredients': ingredients,
        'Preparation Steps': preparation_steps,
        'Calories': nutrition_elements['Calories'],
        'Fat': nutrition_elements['Fat'],
        'Carbs': nutrition_elements['Carbs'],
        'Fiber': nutrition_elements['Fiber'],
        'Sugar': nutrition_elements['Sugar'],
        'Protein': nutrition_elements['Protein'],
        'Tags': tags,
        'Recipe URL': url
    })

# Loop through all recipe links and scrape details
for link in recipe_links:
    scrape_recipe(link)
    time.sleep(1)  # Add delay to avoid overwhelming the server

# Create a DataFrame from the recipes list
df_dinner = pd.DataFrame(recipes)

# Save the DataFrame to a CSV file
df_dinner.to_csv('tasty_recipes_dinner.csv', index=False)

No more 'Show more' button or error: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

Found recipe links: 3345
HTTP error occurred for https://tasty.co/recipe/honey-garlic-chicken: 404 Client Error: Not Found for url: https://tasty.co/recipe/honey-garlic-chicken
HTTP error occurred for https://tasty.co/recipe/rotisserie-chicken-dinner-the-garden-lover: 404 Client Error: Not Found for url: https://tasty.co/recipe/rotisserie-chicken-dinner-the-garden-lover
HTTP error occurred for https://tasty.co/recipe/accordion-potatoes-with-garlic-cajun-butter: 404 Client Error: Not Found for url: https://tasty.co/recipe/accordion-potatoes-with-garlic-cajun-butter
HTTP error occurred for https://tasty.co/recipe/mushroom-bacon-pasta-salad: 404 C

In [18]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

# Set the correct path to the Firefox binary
firefox_binary_path = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'  # Update this path as needed

# Base URL of the website
base_url = 'https://tasty.co/'

# URL of the snacks recipes page
snacks_url = 'https://tasty.co/tag/desserts'

# Set up Selenium with Firefox
firefox_options = FirefoxOptions()
firefox_options.binary_location = firefox_binary_path
firefox_options.add_argument("--headless")
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=firefox_options)
driver.get(snacks_url)

# Accept the cookie policy if it appears
try:
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
    ).click()
    time.sleep(2)  # Wait for the cookie banner to disappear
except Exception as e:
    print("No cookie banner found or error:", e)

# Click the "Show more" button multiple times to load more recipes
while True:  # Loop until no more 'Show more' button is found
    try:
        show_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'show-more-button'))
        )
        show_more_button.click()
        time.sleep(2)  # Adjust sleep time if needed
    except Exception as e:
        print("No more 'Show more' button or error:", e)
        break

# Get the page source after clicking "Show more"
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

# Function to get all recipe links on the snacks page
def get_recipe_links(soup):
    recipe_links = []
    for a in soup.find_all('a', href=True):
        href = a['href']
        if href.startswith('/recipe/'):
            link = urljoin(base_url, href)
            recipe_links.append(link)
    return list(set(recipe_links))  # Remove duplicates

# Get the list of recipe links from the snacks page
recipe_links = get_recipe_links(soup)
print("Found recipe links:", len(recipe_links))  # Debug statement

# Initialize an empty list to store recipe details
recipes = []

# Function to scrape details from a single recipe page
def scrape_recipe(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred for {url}: {http_err}")
        return
    except Exception as err:
        print(f"Other error occurred for {url}: {err}")
        return

    soup = BeautifulSoup(response.text, 'html.parser')

    # Function to safely get text from an element
    def get_text_or_none(element):
        return element.text.strip() if element else None

    # Extract recipe details
    recipe_title = get_text_or_none(soup.find('h1', class_='recipe-name extra-bold xs-mb05 md-mb1'))
    author = get_text_or_none(soup.find('div', class_='byline extra-bold xs-text-4 md-text-2'))
    role = get_text_or_none(soup.find('div', class_='role xs-col-12 xs-text-4 md-text-2'))
    score = get_text_or_none(soup.find('span', class_='tips-score-heading extra-bold caps xs-text-5'))
    time_container = soup.find('div', class_='recipe-time-container xs-flex xs-mt2 md-mt0 xs-flex-order-2 xs-mx2 xs-mb3')
    total_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[0].find('p')) if time_container else None
    prep_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[1].find('p')) if time_container else None
    cook_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[2].find('p')) if time_container else None
    ingredients = [li.text.strip() for li in soup.find_all('li', class_='ingredient')]
    preparation_steps = [li.text.strip() for li in soup.find_all('li', class_='xs-mb2')]

    # Extract nutritional information and save in separate columns
    nutrition_elements = { 
        'Calories': None, 'Fat': None, 'Carbs': None, 'Fiber': None, 
        'Sugar': None, 'Protein': None 
    }
    for li in soup.find_all('li', class_='list-unstyled xs-mb1'):
        text = li.get_text(separator=' ')
        for key in nutrition_elements.keys():
            if key in text:
                nutrition_elements[key] = text.replace(key, '').strip()

    # Extract tags
    tags = [tag.text.strip() for tag in soup.find_all('a', class_='breadcrumb_item')]

    # Append the extracted details to the recipes list
    recipes.append({
        'Recipe Title': recipe_title,
        'Author': author,
        'Role': role,
        'Score': score,
        'Total Time': total_time,
        'Prep Time': prep_time,
        'Cook Time': cook_time,
        'Ingredients': ingredients,
        'Preparation Steps': preparation_steps,
        'Calories': nutrition_elements['Calories'],
        'Fat': nutrition_elements['Fat'],
        'Carbs': nutrition_elements['Carbs'],
        'Fiber': nutrition_elements['Fiber'],
        'Sugar': nutrition_elements['Sugar'],
        'Protein': nutrition_elements['Protein'],
        'Tags': tags,
        'Recipe URL': url
    })

# Loop through all recipe links and scrape details
for link in recipe_links:
    scrape_recipe(link)
    time.sleep(1)  # Add delay to avoid overwhelming the server

# Create a DataFrame from the recipes list
df_desserts = pd.DataFrame(recipes)

# Save the DataFrame to a CSV file
df_desserts.to_csv('tasty_recipes_desserts.csv', index=False)

No more 'Show more' button or error: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

Found recipe links: 2536
HTTP error occurred for https://tasty.co/recipe/sungyi-s-smores-cookies: 404 Client Error: Not Found for url: https://tasty.co/recipe/sungyi-s-smores-cookies
HTTP error occurred for https://tasty.co/recipe/yukon-gold-cinnamon-rolls: 404 Client Error: Not Found for url: https://tasty.co/recipe/yukon-gold-cinnamon-rolls


In [17]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

# Set the correct path to the Firefox binary
firefox_binary_path = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'  # Update this path as needed

# Base URL of the website
base_url = 'https://tasty.co/'

# URL of the snacks recipes page
snacks_url = 'https://tasty.co/tag/snacks'

# Set up Selenium with Firefox
firefox_options = FirefoxOptions()
firefox_options.binary_location = firefox_binary_path
firefox_options.add_argument("--headless")
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=firefox_options)
driver.get(snacks_url)

# Accept the cookie policy if it appears
try:
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
    ).click()
    time.sleep(2)  # Wait for the cookie banner to disappear
except Exception as e:
    print("No cookie banner found or error:", e)

# Click the "Show more" button multiple times to load more recipes
while True:  # Loop until no more 'Show more' button is found
    try:
        show_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'show-more-button'))
        )
        show_more_button.click()
        time.sleep(2)  # Adjust sleep time if needed
    except Exception as e:
        print("No more 'Show more' button or error:", e)
        break

# Get the page source after clicking "Show more"
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

# Function to get all recipe links on the snacks page
def get_recipe_links(soup):
    recipe_links = []
    for a in soup.find_all('a', href=True):
        href = a['href']
        if href.startswith('/recipe/'):
            link = urljoin(base_url, href)
            recipe_links.append(link)
    return list(set(recipe_links))  # Remove duplicates

# Get the list of recipe links from the snacks page
recipe_links = get_recipe_links(soup)
print("Found recipe links:", len(recipe_links))  # Debug statement

# Initialize an empty list to store recipe details
recipes = []

# Function to scrape details from a single recipe page
def scrape_recipe(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred for {url}: {http_err}")
        return
    except Exception as err:
        print(f"Other error occurred for {url}: {err}")
        return

    soup = BeautifulSoup(response.text, 'html.parser')

    # Function to safely get text from an element
    def get_text_or_none(element):
        return element.text.strip() if element else None

    # Extract recipe details
    recipe_title = get_text_or_none(soup.find('h1', class_='recipe-name extra-bold xs-mb05 md-mb1'))
    author = get_text_or_none(soup.find('div', class_='byline extra-bold xs-text-4 md-text-2'))
    role = get_text_or_none(soup.find('div', class_='role xs-col-12 xs-text-4 md-text-2'))
    score = get_text_or_none(soup.find('span', class_='tips-score-heading extra-bold caps xs-text-5'))
    time_container = soup.find('div', class_='recipe-time-container xs-flex xs-mt2 md-mt0 xs-flex-order-2 xs-mx2 xs-mb3')
    total_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[0].find('p')) if time_container else None
    prep_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[1].find('p')) if time_container else None
    cook_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[2].find('p')) if time_container else None
    ingredients = [li.text.strip() for li in soup.find_all('li', class_='ingredient')]
    preparation_steps = [li.text.strip() for li in soup.find_all('li', class_='xs-mb2')]

    # Extract nutritional information and save in separate columns
    nutrition_elements = { 
        'Calories': None, 'Fat': None, 'Carbs': None, 'Fiber': None, 
        'Sugar': None, 'Protein': None 
    }
    for li in soup.find_all('li', class_='list-unstyled xs-mb1'):
        text = li.get_text(separator=' ')
        for key in nutrition_elements.keys():
            if key in text:
                nutrition_elements[key] = text.replace(key, '').strip()

    # Extract tags
    tags = [tag.text.strip() for tag in soup.find_all('a', class_='breadcrumb_item')]

    # Append the extracted details to the recipes list
    recipes.append({
        'Recipe Title': recipe_title,
        'Author': author,
        'Role': role,
        'Score': score,
        'Total Time': total_time,
        'Prep Time': prep_time,
        'Cook Time': cook_time,
        'Ingredients': ingredients,
        'Preparation Steps': preparation_steps,
        'Calories': nutrition_elements['Calories'],
        'Fat': nutrition_elements['Fat'],
        'Carbs': nutrition_elements['Carbs'],
        'Fiber': nutrition_elements['Fiber'],
        'Sugar': nutrition_elements['Sugar'],
        'Protein': nutrition_elements['Protein'],
        'Tags': tags,
        'Recipe URL': url
    })

# Loop through all recipe links and scrape details
for link in recipe_links:
    scrape_recipe(link)
    time.sleep(1)  # Add delay to avoid overwhelming the server

# Create a DataFrame from the recipes list
df_snacks = pd.DataFrame(recipes)

# Save the DataFrame to a CSV file
df_snacks.to_csv('tasty_recipes_snacks.csv', index=False)

No more 'Show more' button or error: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

Found recipe links: 2650
HTTP error occurred for https://tasty.co/recipe/honey-garlic-chicken: 404 Client Error: Not Found for url: https://tasty.co/recipe/honey-garlic-chicken
HTTP error occurred for https://tasty.co/recipe/sungyi-s-smores-cookies: 404 Client Error: Not Found for url: https://tasty.co/recipe/sungyi-s-smores-cookies
HTTP error occurred for https://tasty.co/recipe/recipe-title-crispy-twisted-bacon-sticks: 500 Server Error: Internal Server Error for url: https://tasty.co/recipe/recipe-title-crispy-twisted-bacon-sticks
HTTP error occurred for https://tasty.co/recipe/crunchy-garlic-breadcrumb-topping: 404 Client Error: Not Found fo

In [22]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.firefox import GeckoDriverManager

# Set the correct path to the Firefox binary
firefox_binary_path = 'C:\\Program Files\\Mozilla Firefox\\firefox.exe'  # Update this path as needed

# Base URL of the website
base_url = 'https://tasty.co/'

# URL of the snacks recipes page
snacks_url = 'https://tasty.co/tag/lunch'

# Set up Selenium with Firefox
firefox_options = FirefoxOptions()
firefox_options.binary_location = firefox_binary_path
firefox_options.add_argument("--headless")
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=firefox_options)
driver.get(snacks_url)

# Accept the cookie policy if it appears
try:
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
    ).click()
    time.sleep(2)  # Wait for the cookie banner to disappear
except Exception as e:
    print("No cookie banner found or error:", e)

# Click the "Show more" button multiple times to load more recipes
while True:  # Loop until no more 'Show more' button is found
    try:
        show_more_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, 'show-more-button'))
        )
        show_more_button.click()
        time.sleep(2)  # Adjust sleep time if needed
    except Exception as e:
        print("No more 'Show more' button or error:", e)
        break

# Get the page source after clicking "Show more"
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

# Function to get all recipe links on the snacks page
def get_recipe_links(soup):
    recipe_links = []
    for a in soup.find_all('a', href=True):
        href = a['href']
        if href.startswith('/recipe/'):
            link = urljoin(base_url, href)
            recipe_links.append(link)
    return list(set(recipe_links))  # Remove duplicates

# Get the list of recipe links from the snacks page
recipe_links = get_recipe_links(soup)
print("Found recipe links:", len(recipe_links))  # Debug statement

# Initialize an empty list to store recipe details
recipes = []

# Function to scrape details from a single recipe page
def scrape_recipe(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred for {url}: {http_err}")
        return
    except Exception as err:
        print(f"Other error occurred for {url}: {err}")
        return

    soup = BeautifulSoup(response.text, 'html.parser')

    # Function to safely get text from an element
    def get_text_or_none(element):
        return element.text.strip() if element else None

    # Extract recipe details
    recipe_title = get_text_or_none(soup.find('h1', class_='recipe-name extra-bold xs-mb05 md-mb1'))
    author = get_text_or_none(soup.find('div', class_='byline extra-bold xs-text-4 md-text-2'))
    role = get_text_or_none(soup.find('div', class_='role xs-col-12 xs-text-4 md-text-2'))
    score = get_text_or_none(soup.find('span', class_='tips-score-heading extra-bold caps xs-text-5'))
    time_container = soup.find('div', class_='recipe-time-container xs-flex xs-mt2 md-mt0 xs-flex-order-2 xs-mx2 xs-mb3')
    total_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[0].find('p')) if time_container else None
    prep_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[1].find('p')) if time_container else None
    cook_time = get_text_or_none(time_container.find_all('div', class_='recipe-time')[2].find('p')) if time_container else None
    ingredients = [li.text.strip() for li in soup.find_all('li', class_='ingredient')]
    preparation_steps = [li.text.strip() for li in soup.find_all('li', class_='xs-mb2')]

    # Extract nutritional information and save in separate columns
    nutrition_elements = { 
        'Calories': None, 'Fat': None, 'Carbs': None, 'Fiber': None, 
        'Sugar': None, 'Protein': None 
    }
    for li in soup.find_all('li', class_='list-unstyled xs-mb1'):
        text = li.get_text(separator=' ')
        for key in nutrition_elements.keys():
            if key in text:
                nutrition_elements[key] = text.replace(key, '').strip()

    # Extract tags
    tags = [tag.text.strip() for tag in soup.find_all('a', class_='breadcrumb_item')]

    # Append the extracted details to the recipes list
    recipes.append({
        'Recipe Title': recipe_title,
        'Author': author,
        'Role': role,
        'Score': score,
        'Total Time': total_time,
        'Prep Time': prep_time,
        'Cook Time': cook_time,
        'Ingredients': ingredients,
        'Preparation Steps': preparation_steps,
        'Calories': nutrition_elements['Calories'],
        'Fat': nutrition_elements['Fat'],
        'Carbs': nutrition_elements['Carbs'],
        'Fiber': nutrition_elements['Fiber'],
        'Sugar': nutrition_elements['Sugar'],
        'Protein': nutrition_elements['Protein'],
        'Tags': tags,
        'Recipe URL': url
    })

# Loop through all recipe links and scrape details
for link in recipe_links:
    scrape_recipe(link)
    time.sleep(1)  # Add delay to avoid overwhelming the server

# Create a DataFrame from the recipes list
df_lunch = pd.DataFrame(recipes)

# Save the DataFrame to a CSV file
df_lunch.to_csv('tasty_recipes_lunch.csv', index=False)

No more 'Show more' button or error: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:193:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:511:5
dom.find/</<@chrome://remote/content/shared/DOM.sys.mjs:136:16

Found recipe links: 1742
HTTP error occurred for https://tasty.co/recipe/accordion-potatoes-with-garlic-cajun-butter: 404 Client Error: Not Found for url: https://tasty.co/recipe/accordion-potatoes-with-garlic-cajun-butter
HTTP error occurred for https://tasty.co/recipe/mushroom-bacon-pasta-salad: 404 Client Error: Not Found for url: https://tasty.co/recipe/mushroom-bacon-pasta-salad
HTTP error occurred for https://tasty.co/recipe/air-fryer-salmon: 404 Client Error: Not Found for url: https://tasty.co/recipe/air-fryer-salmon
HTTP error occurred for https://tasty.co/recipe/fondant-potatoes: 404 Client Error: Not Found for url: https://tasty.co/r

In [23]:
import pandas as pd

# Load individual DataFrames from CSV files if not already in memory
df_lunch = pd.read_csv('tasty_recipes_lunch.csv')
df_dinner = pd.read_csv('tasty_recipes_dinner.csv')
df_desserts = pd.read_csv('tasty_recipes_desserts.csv')
df_snacks = pd.read_csv('tasty_recipes_snacks.csv')

# Combine all DataFrames into one
df_recipes = pd.concat([df_lunch, df_dinner, df_desserts, df_snacks], ignore_index=True)

# Save the combined DataFrame to a CSV file
df_recipes.to_csv('tasty_recipes_combined.csv', index=False)

print(df_recipes.head())

                                 Recipe Title             Author  \
0  Roasted Moroccan Carrots With Baba Ganoush       Fiona Anchal   
1     Edamame Truffle Dumplings In Miso Broth   Nimisha Wasankar   
2         Grilled Napa Cabbage Fattoush Salad      Reza Setayesh   
3                   Roasted Tomato Basil Soup       Merle O'Neal   
4             Tuna Salad With Roasted Veggies  Mercedes Sandoval   

               Role                 Score   Total Time   Prep Time  \
0  Community Member                   NaN   30 minutes  10 minutes   
1  Community Member                   NaN  1 hr 10 min  40 minutes   
2  Community Member                   NaN   40 minutes  20 minutes   
3        Tasty Team  91% would make again   25 minutes   5 minutes   
4        Tasty Team  96% would make again   35 minutes  15 minutes   

    Cook Time                                        Ingredients  \
0  20 minutes  ['1 bunch dutch carrots, washed and tops trimm...   
1  30 minutes  ['1 package frozen 

In [24]:
df_recipes

Unnamed: 0,Recipe Title,Author,Role,Score,Total Time,Prep Time,Cook Time,Ingredients,Preparation Steps,Calories,Fat,Carbs,Fiber,Sugar,Protein,Tags,Recipe URL
0,Roasted Moroccan Carrots With Baba Ganoush,Fiona Anchal,Community Member,,30 minutes,10 minutes,20 minutes,"['1 bunch dutch carrots, washed and tops trimm...",['Place carrots on a baking dish lined with ba...,72.0,4 g,8 g,2 g,2 g,2 g,"['Meal', 'Dinner', 'Weeknight', 'Kosher', 'Hal...",https://tasty.co/recipe/roasted-moroccan-carro...
1,Edamame Truffle Dumplings In Miso Broth,Nimisha Wasankar,Community Member,,1 hr 10 min,40 minutes,30 minutes,"['1 package frozen shelled edamame', '2 tables...",['Cook the frozen edamame according the packag...,413.0,16 g,49 g,6 g,3 g,18 g,"['Cuisine', 'Asian', 'Japanese', 'Soy', 'Glute...",https://tasty.co/recipe/edamame-truffle-dumpli...
2,Grilled Napa Cabbage Fattoush Salad,Reza Setayesh,Community Member,,40 minutes,20 minutes,20 minutes,"['1 head medium napa cabbage', '1 small red on...",['Preheat the grill and preheat the oven to 37...,430.0,29 g,32 g,2 g,6 g,9 g,"['Cuisine', 'European', 'Mediterranean', 'Kosh...",https://tasty.co/recipe/grilled-napa-cabbage-f...
3,Roasted Tomato Basil Soup,Merle O'Neal,Tasty Team,91% would make again,25 minutes,5 minutes,20 minutes,"['1 tablespoon olive oil', '1 yellow onion, di...","['In a large pot over medium heat, combine the...",48.0,0 g,9 g,1 g,2 g,1 g,"['Meal', 'Dinner', 'Soups', 'Budget', 'High-Fi...",https://tasty.co/recipe/roasted-tomato-basil-soup
4,Tuna Salad With Roasted Veggies,Mercedes Sandoval,Tasty Team,96% would make again,35 minutes,15 minutes,20 minutes,"['1 cup green beans (360 g), trimmed', '2 cups...","['Preheat the oven to 375˚F (190˚C).', 'On a b...",576.0,19 g,51 g,7 g,11 g,49 g,"['Meal', 'Lunch', 'Salads', 'Dairy', 'Fish', '...",https://tasty.co/recipe/tuna-salad-with-roaste...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10245,15-Minute Donuts,Matt Ciampa & Joe Sasto,Tasty Team,87% would make again,15 minutes,7 minutes,8 minutes,"['8 cups canola oil (1.9 L), for frying', '4 t...",['Fill a large pot with 2 inches (5 cm) of can...,490.0,35 g,40 g,1 g,26 g,2 g,"['Cooking Style', 'Deep-Fry', 'Budget', 'Glute...",https://tasty.co/recipe/15-minute-donuts
10246,Vegan Tzatziki,Betsy Carter,Tasty Team,97% would make again,,,,"['12 oz coconut yogurt (340 g)', '1 lemon, jui...","['In a blender or food processor, combine coco...",141.0,7 g,16 g,0 g,11 g,4 g,"['Cuisine', 'European', 'Mediterranean', 'Budg...",https://tasty.co/recipe/vegan-tzatziki
10247,Swiss Roll Pops,,,71% would make again,32 minutes,20 minutes,12 minutes,"['⅔ cup caster sugar (125 g), plus more for du...","['Pre-heat the oven to 200°C (400°F).', 'Add t...",150.0,3 g,25 g,0 g,16 g,2 g,"['Meal', 'Desserts', 'Cakes', 'Budget', 'Sweet...",https://tasty.co/recipe/swiss-roll-pops
10248,Banana Bread Cake Truffles,Tresha Lindo & Chris Salicrup,Tasty Team,94% would make again,2 hr 15 min,30 minutes,50 minutes,"['nonstick cooking spray, for greasing', '3 ri...",['Make the banana bread: Preheat the oven to 3...,315.0,7 g,62 g,4 g,40 g,3 g,"['Seasonal', 'Occasion', 'Party', 'Budget', 'D...",https://tasty.co/recipe/banana-bread-cake-truf...
