In [None]:
#pip install yt-dlp
import requests
import json
from yt_dlp import YoutubeDL
import os, json
from google import genai
from dotenv import load_dotenv

#must install ffmpeg using terminal command: sudo apt install ffmpeg or brew install ffmpeg
#or manually downloading and adding it to path
#check if ffmpeg is installed using terminal command: which ffmpeg and which ffprobe
load_dotenv()

client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
url = "" #unique instagram link here
file_type = "mp3" 
file_path = "data/CookingAudio.mp3" #default path to save audio file


In [1]:
def download_video(url, file_type = "mp3"):
    if file_type == "mp4":
        ydl_opts = {"outtmpl": "data/CookingAudio.mp4"} #Just mp4
    elif file_type == "mp3":
        ydl_opts = { #Just mp3
        'format': 'bestaudio/best',  # get best audio stream
        'outtmpl': 'data/CookingAudio',  # output filename set to Data Folder, default name to delete later
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',  # extract audio
            'preferredcodec': 'mp3',     # convert to mp3
            'preferredquality': '192',   # optional: audio quality
        }],
        'ffmpeg_location': '/usr/bin/ffmpeg'
    }
    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
        




#ydl_opts = {"outtmpl": "%(title)s.%(ext)s"} #Just mp4


In [None]:
def transcribe_and_extract_recipe(file_path="data/CookingAudio.mp3"):
    """
    Sends a cooking audio or video file to Gemini Pro Vision and returns structured recipe JSON:
    - name: short recipe name
    - ingredients: list of tuples [amount, ingredient]
    - caption: short caption summarizing the recipe

    Args:
        file_path (str): Path to the audio (MP3/WAV) or video file.

    Returns:
        dict: JSON object with keys 'name', 'ingredients', and 'caption'.
    """
    

    # Detect media type from extension
    media_type = "audio" if file_path.lower().endswith((".mp3", ".wav", ".m4a")) else "video"

    instructions = (
        f"Listen to the {media_type} and extract a recipe. "
        "If an ingredient amount is not specified, estimate a reasonable value based on context "
        "(for example, how much butter is usually added for 3lb butter chicken). "
        "Return the result strictly as a JSON object with the following keys:\n"
        "- name: short recipe name\n"
        "- ingredients: list of tuples [amount, ingredient]\n"
        "- caption: short caption summarizing the recipe"
    )

    # Upload the file
    uploaded_file = client.files.upload(file=file_path)

    # Generate structured recipe JSON
    resp = client.models.generate_content(
        model="gemini-2.5-flash",  # or "gemini-2.5-pro"
        contents=[
            uploaded_file,
            instructions
        ],
        config={
            "temperature": 0,
            "response_mime_type": "application/json"  # Ensures we get JSON output
        }
    )

    try:
        # Convert the returned text into a Python dict
        recipe_json = json.loads(resp.text)
    except json.JSONDecodeError:
        raise ValueError(f"Failed to parse response as JSON: {resp.text}")

    return recipe_json


In [None]:
#full list of operations
download_video(url) #dowloaded to data/CookingAudio.mp3
recipe = transcribe_and_extract_recipe(file_path)
print(json.dumps(recipe, indent=2))
os.remove("data/CookingAudio.mp3")

#remember to id