In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Load Excel file
df = pd.read_excel("video_descriptions_with_links.xlsx")

# Ensure "ingredients" and "serving_size" columns exist
df["ingredients"] = ""
df["serving_size"] = ""

output_file = "remaining_descriptions_with_ingredients.xlsx"

try:
    for idx, link in enumerate(df["recipe_link"], start=1):
        try:
            response = requests.get(link, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")

            #  Extract ingredients
            ingredients = []
            for li in soup.select("li.wprm-recipe-ingredient"):
                amount = li.select_one(".wprm-recipe-ingredient-amount")
                unit = li.select_one(".wprm-recipe-ingredient-unit")
                name = li.select_one(".wprm-recipe-ingredient-name")
                notes = li.select_one(".wprm-recipe-ingredient-notes")

                text_parts = []
                if amount:
                    text_parts.append(amount.get_text(strip=True))
                if unit:
                    text_parts.append(unit.get_text(strip=True))
                if name:
                    text_parts.append(name.get_text(strip=True))
                if notes:
                    text_parts.append(notes.get_text(strip=True))

                ingredients.append(" ".join(text_parts))

            df.at[idx - 1, "ingredients"] = (
                "\n".join(ingredients) if ingredients else "No ingredients found"
            )

            #  Extract serving size
            serving_input = soup.select_one("input.wprm-recipe-servings")
            serving_unit = soup.select_one("span.wprm-recipe-servings-unit")

            if serving_input:
                serving_value = serving_input.get("value", "").strip()
                unit_text = serving_unit.get_text(strip=True) if serving_unit else ""
                df.at[idx - 1, "serving_size"] = f"{serving_value} {unit_text}".strip()
            else:
                df.at[idx - 1, "serving_size"] = "Not found"

        except Exception as e:
            df.at[idx - 1, "ingredients"] = f"Error: {e}"
            df.at[idx - 1, "serving_size"] = f"Error: {e}"

        #  Autosave every 10 recipes
        if idx % 10 == 0:
            df.to_excel(output_file, index=False)
            print(f" Autosaved after {idx} recipes")

except KeyboardInterrupt:
    print("\n Interrupted by user. Saving progress...")
    df.to_excel(output_file, index=False)
    print(" Progress saved. Exiting safely.")
    exit(0)

# Final save
df.to_excel(output_file, index=False)
print(" Extraction completed and saved!")


âœ… Autosaved after 10 recipes
âœ… Autosaved after 20 recipes
âœ… Autosaved after 30 recipes
âœ… Autosaved after 40 recipes
âœ… Autosaved after 50 recipes
âœ… Autosaved after 60 recipes
âœ… Autosaved after 70 recipes
âœ… Autosaved after 80 recipes
âœ… Autosaved after 90 recipes
âœ… Autosaved after 100 recipes
âœ… Autosaved after 110 recipes
âœ… Autosaved after 120 recipes
âœ… Autosaved after 130 recipes
âœ… Autosaved after 140 recipes
âœ… Autosaved after 150 recipes
âœ… Autosaved after 160 recipes
âœ… Autosaved after 170 recipes
âœ… Autosaved after 180 recipes
âœ… Autosaved after 190 recipes
âœ… Autosaved after 200 recipes
âœ… Autosaved after 210 recipes
âœ… Autosaved after 220 recipes
âœ… Autosaved after 230 recipes
âœ… Autosaved after 240 recipes
âœ… Autosaved after 250 recipes
âœ… Autosaved after 260 recipes
âœ… Autosaved after 270 recipes
âœ… Autosaved after 280 recipes
âœ… Autosaved after 290 recipes
âœ… Autosaved after 300 recipes
âœ… Autosaved after 310 recipes
âœ… Autosaved aft