# Setup

In [1]:
import os
import pathlib
import pickle

import httpx
from dotenv import load_dotenv
from google import genai
from google.genai import types as gemini_types
from langchain_google_genai import ChatGoogleGenerativeAI
from tqdm import tqdm

from eduly import EdulyAnimationClient, EdulyBreakdownClient

load_dotenv(dotenv_path="../.env")

aistudio_gemini_api_key = os.environ['GOOGLE_API_KEY']
print(aistudio_gemini_api_key[:3] + '...' + aistudio_gemini_api_key[-1:])
gemini_client = genai.Client(api_key=aistudio_gemini_api_key)

MODEL_NAME = "gemini-3-flash-preview"



AIz...8


# Breakdown

In [2]:
eduly_breakdown_client = EdulyBreakdownClient(gemini_client)

In [3]:
# breakdown_obj, raw_breakdown_response = eduly_breakdown_client.breakdown(
#     file_path=pathlib.Path("./attention_is_all_you_need/attention_is_all_you_need.pdf"),
#     model=MODEL_NAME,
#     thinking_level="high"
# )

# with open('./attention_is_all_you_need/cached_outputs/attention_is_all_you_need_breakdown.pkl', 'wb') as f:
#     pickle.dump(breakdown_obj, f)

In [4]:
with open('./attention_is_all_you_need/cached_outputs/attention_is_all_you_need_breakdown.pkl', 'rb') as f:
    breakdown_obj = pickle.load(f)

In [5]:
santised_title = breakdown_obj.document_title.replace(" ", "_").lower()
santised_title

'attention_is_all_you_need'

In [6]:
for i, topic in enumerate(breakdown_obj.topics):
    print(f"Topic {i}: {topic.name}")

Topic 0: The Shift from Recurrence to Attention
Topic 1: The Encoder-Decoder Structure
Topic 2: Scaled Dot-Product Attention: The Mathematical Core
Topic 3: Multi-Head Attention: Seeing in Parallel
Topic 4: Positional Encoding: Giving Order to Chaos
Topic 5: Efficiency and Complexity: Why Transformers Scaled
Topic 6: Results and Historical Significance


In [7]:
# storyboards = {}

# for i, topic in tqdm(enumerate(breakdown_obj.topics)):
#     storyboard_obj, raw_storyboard_response = eduly_breakdown_client.storyboard(
#         topic=topic,
#         model=MODEL_NAME,
#         thinking_level="high",
#         # source_file="/./attention_is_all_you_need/attention_is_all_you_need.pdf"
#     )

#     storyboards[topic.name] = storyboard_obj

#     with open(f'./cached_outputs/{santised_title}_storyboard_{i}.pkl', 'wb') as f:
#         pickle.dump(storyboard_obj, f)

In [8]:
storyboards = {}

for i, topic in enumerate(breakdown_obj.topics):
    with open(f'./attention_is_all_you_need/cached_outputs/{santised_title}_storyboard_{i}.pkl', 'rb') as f:
        storyboards[topic.name] = pickle.load(f)

In [9]:
storyboards

{'The Shift from Recurrence to Attention': TopicStoryboard(topic_name='The Shift from Recurrence to Attention', visual_concept="The central metaphor is a 'Conveyor Belt vs. a Global Spotlight.' We visualize Recurrent Neural Networks (RNNs) as a linear, one-at-a-time assembly line that loses detail over distance, contrasted with Attention as a lightning-fast web where every part of a sentence connects to every other part simultaneously.", scenes=[Scene(scene_type='hook', title='The Sequential Bottleneck', visual_description="A long sentence: 'The curious cat, which had spent all morning chasing a bright red laser pointer across the living room floor, finally fell asleep.' The words enter a narrow tube one by one. As each word enters, a small glowing box (the 'hidden state') moves to the next word. It looks slow and rhythmic, like a pulse. As the pulse reaches the end of the sentence, the glow from the word 'cat' at the beginning has almost completely faded away.", narration='For years, 

# Animation

In [None]:
langchain_client = ChatGoogleGenerativeAI(
    model=MODEL_NAME,
    temperature=1.0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)
langchain_client.client = gemini_client

In [11]:
langchain_client.invoke("Hello, world!")

AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?', 'extras': {'signature': 'CvgIAY89a1+b06Gv2pgcZdqk97aLZdnTFh8AekE0LzhnGtC5sB4HmxKPEJ9zONJkUVNUEfz3IP2d1UNADi8xUuGIjN8HUmeqXCna/wo3OadT9exNdD7w24GE24OkIlgbX/LdahlNUz2quFtJy/XI1yIGDDuodOnSfEFZraw9ZituLSr9Y29jpWfkEQUP69VOYX4F1Ed7JfwkkqDES6NimiUCn7AiVKn97e88wEN7hVCpj6VMwsALp6XVXHWTobttgFCW+9CgM6D1fVuaiRs1Tfq4VNsqnu2xaSvswd507adSBWTKMNBVx64NcbxClKskGEiiB4mQFleW7yCx9vy3nt9+ctI/CvkVA/CyjDqlhREaYunjetdc7cLF54/eWORnogZve8MZoOsuVpWIbx4EbcUW87XQlfnHsTx/QBcPQMvwaX/gPcX9K9oz2zKOt+GagrolGsOAHuAIZ78Fff3VFVtQzDx/6wbaZqXpOpnN1kK1LPgq43uUzfX9xteFVpNbbSMw8ZIhKNEO+fk8viFueHWCswfUzzgErEUNRv34GLlVW5No8qFoY2z7aNSGUByoJLCe1w5H9xh3Vz2P7OLXra8hvLw9+azE8KFr+RfVtiedKh40itbxalKYJ1tz+2h0RX5md0ERsKB+8LO4iXKkvCxLAHslaMboBV2Z98e0yCXWxaR+1u+T0ya/oDDb8C3XXdzUGjDYN+ktl2SOlR8lUGd0rkDDuHA1ZakkCUHn9sNAxQi4YI3wt+2bgys8BEjo6C/gtsdtFZuZ50dA0pQrk+4bq+tiT1fF7fj9ZIvG0FO88H4Cricn7poVNpioKYBcJ/FGvELOES0Dddmqn+dQTeJgNcj55dO8RUNfBgiek70bLabf0OoVsaFT6CWevNa

In [12]:
eduly_animation_client = EdulyAnimationClient(langchain_client, agent_workspace_path='./agent_workspace/')

In [13]:
storyboards

{'The Shift from Recurrence to Attention': TopicStoryboard(topic_name='The Shift from Recurrence to Attention', visual_concept="The central metaphor is a 'Conveyor Belt vs. a Global Spotlight.' We visualize Recurrent Neural Networks (RNNs) as a linear, one-at-a-time assembly line that loses detail over distance, contrasted with Attention as a lightning-fast web where every part of a sentence connects to every other part simultaneously.", scenes=[Scene(scene_type='hook', title='The Sequential Bottleneck', visual_description="A long sentence: 'The curious cat, which had spent all morning chasing a bright red laser pointer across the living room floor, finally fell asleep.' The words enter a narrow tube one by one. As each word enters, a small glowing box (the 'hidden state') moves to the next word. It looks slow and rhythmic, like a pulse. As the pulse reaches the end of the sentence, the glow from the word 'cat' at the beginning has almost completely faded away.", narration='For years, 

In [14]:
test_storyboard_1 = storyboards["Scaled Dot-Product Attention: The Mathematical Core"]
print(test_storyboard_1.topic_name)
for i, scene in enumerate(test_storyboard_1.scenes):
    print(i, scene.title)

Scaled Dot-Product Attention: The Mathematical Core
0 The Ambiguity Problem
1 Defining Q, K, and V
2 The Dot Product Comparison
3 The Dimension Explosion
4 The Softmax Wall
5 The Scaling Factor
6 Softmax to Weights
7 The Final Weighted Sum
8 Hardware Optimization: FlashAttention
9 The Complete Formula


In [None]:
# Be Patient! This takes a while (5 minutes if you're lucky with no iterations, sometimes longer)
storyboard_1_animation_results = eduly_animation_client.animate_single(
    breakdown=breakdown_obj,
    storyboard=test_storyboard_1,
    topic_index=2,
    max_iterations=5,
    ratelimit=60
)

In [None]:
print("Success : ", storyboard_1_animation_results.success)
print(storyboard_1_animation_results.video_path)

Success :  True
/agent_workspace/rendered_videos/Scaled_Dot-Product_Attention_The_Mathematical_Core_2.mp4


In [26]:
test_storyboard_2 = storyboards["Multi-Head Attention: Seeing in Parallel"]
print(test_storyboard_2.topic_name)
for i, scene in enumerate(test_storyboard_2.scenes):
    print(i, scene.title)

Multi-Head Attention: Seeing in Parallel
0 The Problem of the Weighted Average
1 Enter the Multi-Head Prism
2 Parallel Perspectives: Syntax vs. Semantics
3 The Math of Scaling Down
4 The Concatenation Step
5 The Final Projection
6 Optimizing for Speed: GQA and MQA
7 The Power of Many Eyes
