In [4]:
from pydantic import BaseModel
from typing import Literal

from openai import OpenAI

import instructor

client = instructor.from_openai(OpenAI())


In [5]:
import uuid

from pydantic import BaseModel, Field
from pydantic.json_schema import SkipJsonSchema

class QuestionAnswer(BaseModel):
    question: str = Field(description="Question about the topic")
    options: list[str] = Field(
        description="Potential answers to the question.",
        min_items=3,
        max_items=5
    )
    answer_index: int = Field(
        description="Index of the correct answer options (starting from 0).",
        ge=0,
        lt=5
    )
    difficulty: int = Field(
        description="Difficulty of this question from 1 to 5, 5 being the most difficult.",
        gt=0,
        le=5, 
    )
    youtube_url: SkipJsonSchema[str | None] = None
    id: uuid.UUID = Field(
        description="Unique identifier",
        default_factory=uuid.uuid4
    )

In [13]:
from youtube_transcript_api import YouTubeTranscriptApi

youtube_url = "https://www.youtube.com/watch?v=eI89jduge3Q&ab_channel=SimpleNursing" 
_, _, video_id = youtube_url.partition("?v=")
segments = YouTubeTranscriptApi.get_transcript(video_id)
transcript = " ".join([s['text'] for s in segments])

In [14]:
transcript

"now for congenital heart defects this is an abnormality in the heart that develops before birth where one or more problems within the heart structures changes the way blood flows through the heart and out to the body now in terms of risk factors genetics plays a huge role so family history of heart disease and down syndrome predisposes a child to congenital heart defects and even during pregnancy if the mother has infection like rubella or uses Al alol or drug abuse and even diabetes all these things are huge risk factors now in terms of the types we're going to break down all the pathophysiology signs and symptoms and treatments for all the top tested cardiac defects but first let's cover some key points that apply to all of these as you know in a normal healthy heart deoxygenated blood is vacuumed back to the heart via the veins through the venne Cava and into the right side of the heart then then pushed into the lungs to get oxygenated after that this oxygenated blood is pushed to 

In [12]:
import instructor
import openai

instructor_client = instructor.from_openai(openai.OpenAI())

system_prompt = """Analyze the given YouTube transcript and generate question-answer pairs
to help study and understand the topic better. Please rate all questions from 1 to 5
based on their difficulty."""

response = instructor_client.chat.completions.create_iterable(
    model="gpt-4o-mini",
    response_model=QuestionAnswer,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": transcript},
    ],
)

In [36]:
import instructor
import openai

instructor_client = instructor.from_openai(openai.OpenAI())

system_prompt = """Analyze the given YouTube transcript and generate question-answer pairs
to help study and understand the topic better. Please rate all questions from 1 to 5
based on their difficulty."""

response = instructor_client.chat.completions.create_iterable(
    model="gpt-4o",
    response_model=QuestionAnswer,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": transcript},
    ],
)

In [37]:
print("Preview:\n")
count = 0
for qna in response:
    if count > 10:
        break
    print(qna.question)
    print(qna.options)
    print(qna.answer_index)
    count += 1


Preview:

What is a congenital heart defect?
['An infection in the heart after birth', 'A genetic mutation affecting heart rhythm', 'An abnormality in the heart that develops before birth']
2
Which of the following is a risk factor for congenital heart defects?
['Vegan diet during pregnancy', 'High blood pressure in the father', 'Infections like rubella during pregnancy']
2
What is cardiac output?
['The amount of oxygen-rich blood pumped out to the body', 'The ability to breathe normally', 'The pressure of blood flow through the veins']
0
In congenital heart defects, less blood is pumped out of the heart, resulting in which complication?
['Increased heart rate', 'Decreased cardiac output', 'High blood pressure']
1
Which condition is commonly tested due to right-to-left blood flow problems?
['Truncus arteriosus', 'Tetralogy of Fallot (TF)', 'Atrial septal defect (ASD)']
1
What are two symptoms associated with cyanotic congenital heart defects?
['Frequent urination and swelling', 'Cyanos

In [30]:
from burr.core import action, State


@action(reads=[], writes=["youtube_url"])
def process_user_input(state: State, user_input: str) -> State:
    """Process user input and update the YouTube URL."""
    youtube_url = user_input  # In practice, we would have more complex validation logic.
    return state.update(youtube_url=youtube_url)


@action(reads=["youtube_url"], writes=["transcript"])
def get_youtube_transcript(state: State) -> State:
    """Get the official YouTube transcript for a video given it's URL"""
    youtube_url = state["youtube_url"]

    _, _, video_id = youtube_url.partition("?v=")
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    full_transcript = " ".join([entry['text'] for entry in transcript])

    # store the transcript in state
    return state.update(transcript=full_transcript, youtube_url=youtube_url)


@action(reads=["transcript", "youtube_url"], writes=["question_answers"])
def generate_question_and_answers(state: State) -> State:
    """Generate `QuestionAnswer` from a YouTube transcript using an LLM."""
    # read the transcript from state
    transcript = state["transcript"]
    youtube_url = state["youtube_url"]

    # create the instructor client
    instructor_client = instructor.from_openai(openai.OpenAI())
    system_prompt = (
        "Analyze the given YouTube transcript and generate question-answer pairs"
        " to help study and understand the topic better. Please rate all questions from 1 to 5"
        " based on their difficulty."
    )
    response = instructor_client.chat.completions.create_iterable(
        model="gpt-4o-mini",
        response_model=QuestionAnswer,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": transcript},
        ],
    )

    # iterate over QuestionAnswer, add the `youtube_url`, and append to state
    for qna in response:
        qna.youtube_url = youtube_url
        # `State` is immutable, so `.append()` returns a new object with the appended value
        state = state.append(question_answers=qna)

    return state

In [32]:
from burr.core import ApplicationBuilder

app = (
    ApplicationBuilder()
    .with_actions(
        process_user_input,
        get_youtube_transcript,
        generate_question_and_answers,
    )
    .with_transitions(
        ("process_user_input", "get_youtube_transcript"),
        ("get_youtube_transcript", "generate_question_and_answers"),
        ("generate_question_and_answers", "process_user_input"),
    )
    .with_entrypoint("process_user_input")
    .build()
)
app.visualize()

ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH

<graphviz.graphs.Digraph at 0x12c22aab0>