In [1]:
# imports

import os
import re
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI

In [2]:
!pip install youtube_transcript_api



In [3]:
from youtube_transcript_api import YouTubeTranscriptApi

In [4]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [None]:
class YouTubeRecipeExtractor:

    def __init__(self):
        self.openai = OpenAI()
        self.system_prompt = self.get_system_prompt()

    def get_system_prompt(self):
        return """
        You are a professional chef and nutritionist specializing in recipe writting.

        Your task is to write recipes in a very comprehensive and consistent manner.
        Each recipe will contain a list of ingredients and a list of steps to follow.
        The quantities of the ingredients should always be referred to an official unit (grams, litres, etc). If the original recipe uses a different unit (such as cup, teaspoons, etc.) make the transformation but keep the original instruction between parenthesis.
        The steps should be described in a very synthetic and concise manner. You should avoid being verbose, but the step should be understandable and easy to follow for non-expert people.
        To each recipe add a general analysis from nutrition perspective (number of calories per serving, proteins, fat, etc.).
        Use Markdown to improve readability.
        If the text you receive is not a recipe, return a kind message explaining the situation.
        """

    def extract_video_id(self, url):
        """Extract video ID from YouTube URL"""
        pattern = r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([^&\n?#]+)'
        match = re.search(pattern, url)
        return match.group(1) if match else None

    def get_transcription(self, video_id):
        try:
            print(f"Fetching video transcript for video {video_id}...")
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            return " ".join([item['text'] for item in transcript])
        except Exception as e:
            print(f"Error fetching transcript: {e}")
            return None

    def format_recipe(self, transcript):
        try:
            response = self.openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": self.system_prompt},
                    {"role": "user", "content": f"Summarize the following YouTube recipe:\n\n{transcript}"}
                ]
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"Error summarizing text: {e}")
            return None

    def display_recipe(self, url):
        transcript = self.get_transcription(self.extract_video_id(url))
        recipe = self.format_recipe(transcript)
        display(Markdown(recipe))


In [6]:
test_bad_url = "https://www.youtube.com/watch?v=hzGiTUTi060"
test_good_url = "https://www.youtube.com/watch?v=D_2DBLAt57c"

In [7]:
extractor = YouTubeRecipeExtractor()


In [8]:
extractor.display_recipe(test_bad_url)

Fetching video transcript...


Thank you for your interest, but the text you provided is not a recipe. If you're looking for cooking instructions, ingredient lists, or nutrition analysis, please provide a specific food or dish you would like to know about, and I'd be happy to help!

In [8]:
extractor.display_recipe(test_good_url)

Fetching video transcript for video D_2DBLAt57c...
Error fetching transcript: YouTubeTranscriptApi.fetch() missing 1 required positional argument: 'self'


It seems like you haven't provided a recipe or any details to summarize. If you have a specific recipe in mind, please share it, and I'll be happy to help!