In [1]:
import requests
from bs4 import BeautifulSoup

# Define a function to generate Food.com URLs from IDs and names
def generate_food_com_url(recipe_id, recipe_name):
    return f'https://www.food.com/recipe/{recipe_name.replace(" ", "-")}-{recipe_id}'

In [2]:
# Sample data (you can replace this with your actual data)
recipe_data = [
    {"id": 55309, "name": "caprese salad tomatoes"},
    {"id": 248495, "name": "make your own boursin cheese paula deen"},
    {"id": 186029, "name": "the best creole cajun seasoning mix"},
]

In [3]:
# Loop through the recipe data to generate URLs and print them
for recipe in recipe_data:
    recipe_id = recipe["id"]
    recipe_name = recipe["name"]
    food_com_url = generate_food_com_url(recipe_id, recipe_name)
    print(f"Recipe Name: {recipe_name}")
    print(f"Food.com URL: {food_com_url}")
    print("\n")

# You can also save the URLs in a list or a file for further use.


Recipe Name: caprese salad tomatoes
Food.com URL: https://www.food.com/recipe/caprese-salad-tomatoes-55309


Recipe Name: make your own boursin cheese paula deen
Food.com URL: https://www.food.com/recipe/make-your-own-boursin-cheese-paula-deen-248495


Recipe Name: the best creole cajun seasoning mix
Food.com URL: https://www.food.com/recipe/the-best-creole-cajun-seasoning-mix-186029




In [4]:
# Define a function to extract the recipe title from a Food.com URL
def extract_recipe_title(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        title_element = soup.find('h1')
        if title_element:
            return title_element.text.strip()
    return None


In [5]:
# Loop through the recipe data to generate URLs, fetch titles, and print them
for recipe in recipe_data:
    recipe_id = recipe["id"]
    recipe_name = recipe["name"]
    food_com_url = generate_food_com_url(recipe_id, recipe_name)
    print(food_com_url)
    recipe_title = extract_recipe_title(food_com_url)

    if recipe_title:
        print(f"Recipe Name: {recipe_name}")
        print(f"Proper Title: {recipe_title}")
        print("\n")
    else:
        print(f"Unable to fetch proper title for {recipe_name}\n")

# You can also save the titles in a list or a file for further use.



https://www.food.com/recipe/caprese-salad-tomatoes-55309
Recipe Name: caprese salad tomatoes
Proper Title: Caprese Salad Tomatoes (Italian Marinated Tomatoes)


https://www.food.com/recipe/make-your-own-boursin-cheese-paula-deen-248495
Recipe Name: make your own boursin cheese paula deen
Proper Title: Make Your Own Boursin Cheese - Paula Deen


https://www.food.com/recipe/the-best-creole-cajun-seasoning-mix-186029
Recipe Name: the best creole cajun seasoning mix
Proper Title: The Best Creole/Cajun Seasoning Mix




In [None]:
import sqlite3
import requests
from bs4 import BeautifulSoup
from multiprocessing import Pool
import pickle
import time

# Define a function to generate Food.com URLs from IDs and names
def generate_food_com_url(recipe_id, recipe_name):
    return f'https://www.food.com/recipe/{recipe_name.replace(" ", "-")}-{recipe_id}'

# Define a function to extract the recipe title from a Food.com URL
def extract_recipe_title(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        title_element = soup.find('h1')
        if title_element:
            return title_element.text.strip()
    return None

# Function to process a single recipe and return (recipe_id, recipe_title)
def process_recipe(recipe):
    recipe_id, recipe_name = recipe
    food_com_url = generate_food_com_url(recipe_id, recipe_name)
    recipe_title = extract_recipe_title(food_com_url)
    return recipe_id, recipe_title

# Create a connection to the SQLite database
db_connection = sqlite3.connect('food_dotcom.db')

# Create a cursor to execute SQL queries
cursor = db_connection.cursor()

# Query all entries from the "recipes" table
cursor.execute('SELECT id, name FROM recipes')
recipe_data = cursor.fetchall()

# Close the database connection
db_connection.close()

# Set the number of parallel processes to use
num_processes = 4  # Adjust this based on your system's capabilities

# Define the chunk size
chunk_size = 3  # Adjust this based on your requirements

# Split the data into chunks
chunks = [recipe_data[i:i + chunk_size] for i in range(0, len(recipe_data), chunk_size)]

# Create a list to store the results
results = []

# Initialize variables for progress tracking and timing
total_recipes = len(recipe_data)
recipes_processed = 0
start_time = time.time()

# Create a multiprocessing Pool
with Pool(num_processes) as pool:
    for i, chunk in enumerate(chunks, 1):
        chunk_results = pool.map(process_recipe, chunk)
        results.extend(chunk_results)
        recipes_processed += len(chunk)
        
        # Calculate elapsed time for the current chunk
        chunk_elapsed_time = time.time() - start_time

        # Display progress and timing information
        print(f'Chunk {i}/{len(chunks)} completed.')
        print(f'Recipes Processed: {recipes_processed}/{total_recipes}')
        print(f'Chunk Elapsed Time: {chunk_elapsed_time:.2f} seconds\n')

# Store the results (recipe_id, recipe_title) in a pickle file
with open('recipe_titles.pickle', 'wb') as pickle_file:
    pickle.dump(results, pickle_file)

# Calculate and display the total execution time
end_time = time.time()
total_execution_time = end_time - start_time
print(f'Total Execution Time: {total_execution_time:.2f} seconds')
