In [3]:
import requests
from bs4 import BeautifulSoup
import time

In [4]:
def get_recipe_urls(base_url, total_pages):
    """
    Scrape recipe URLs from all pages of Pinch of Yum's recipe section
    
    Args:
        base_url (str): The base URL pattern for pagination
        total_pages (int): Total number of pages to scrape
    
    Returns:
        list: List of recipe URLs
    """
    all_recipe_urls = []
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    # Loop through each page
    for page in range(1, total_pages + 1):
        # Construct the URL for current page
        if page == 1:
            url = "https://pinchofyum.com/recipes/all"
        else:
            url = f"https://pinchofyum.com/recipes/all/page/{page}"
        
        try:
            # Make request to the page
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            
            # Parse HTML
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find all recipe article elements
            recipe_articles = soup.find_all('article')
            
            # Extract URLs from articles
            for article in recipe_articles:
                link = article.find('a')
                if link and 'href' in link.attrs:
                    recipe_url = link['href']
                    all_recipe_urls.append(recipe_url)
            
            print(f"Scraped page {page} successfully - Found {len(recipe_articles)} recipes")
            
            # Add delay to be respectful to the server
            time.sleep(2)
            
        except Exception as e:
            print(f"Error scraping page {page}: {str(e)}")
            continue
    
    return all_recipe_urls

def save_urls_to_txt(urls, filename='recipe_urls.txt'):
    """
    Save the scraped URLs to a text file, one URL per line
    
    Args:
        urls (list): List of recipe URLs
        filename (str): Name of output text file
    """
    with open(filename, 'w') as f:
        for url in urls:
            f.write(url + '\n')
    print(f"Saved {len(urls)} URLs to {filename}")



In [5]:
BASE_URL = "https://pinchofyum.com/recipes/all"
TOTAL_PAGES = 107
    
# Scrape the URLs
recipe_urls = get_recipe_urls(BASE_URL, TOTAL_PAGES)
    
# Save to text file
save_urls_to_txt(recipe_urls)

Scraped page 1 successfully - Found 12 recipes
Scraped page 2 successfully - Found 12 recipes
Scraped page 3 successfully - Found 12 recipes
Scraped page 4 successfully - Found 12 recipes
Scraped page 5 successfully - Found 12 recipes
Scraped page 6 successfully - Found 12 recipes
Scraped page 7 successfully - Found 12 recipes
Scraped page 8 successfully - Found 12 recipes
Scraped page 9 successfully - Found 12 recipes
Scraped page 10 successfully - Found 12 recipes
Scraped page 11 successfully - Found 12 recipes
Scraped page 12 successfully - Found 12 recipes
Scraped page 13 successfully - Found 12 recipes
Scraped page 14 successfully - Found 12 recipes
Scraped page 15 successfully - Found 12 recipes
Scraped page 16 successfully - Found 12 recipes
Scraped page 17 successfully - Found 12 recipes
Scraped page 18 successfully - Found 12 recipes
Scraped page 19 successfully - Found 12 recipes
Scraped page 20 successfully - Found 12 recipes
Scraped page 21 successfully - Found 12 recipes
S