In [2]:
!pip install yt-dlp



In [3]:
# Set AssemblyAI API Key
import os
from pathlib import Path

from dotenv import load_dotenv
# ❷ Read the key-value pairs and add them to os.environ
load_dotenv()      # ← now the vars are set

# print(f"AssemblyAI API key is {os.environ['ASSEMBLYAI_API_KEY']} !")

True

In [4]:
import os
import time
import requests

# ─── AssemblyAI Configuration ─────────────────────────────────────────────────

API_KEY = os.getenv("ASSEMBLYAI_API_KEY")
if not API_KEY:
    raise RuntimeError("Please set your ASSEMBLYAI_API_KEY environment variable")

HEADERS = {
    "Authorization": API_KEY,  # Changed from "authorization" to "Authorization"
    "Content-Type": "application/json"  # Changed from "content-type" to "Content-Type"
}
TRANSCRIBE_URL = "https://api.assemblyai.com/v2/transcript"

# ─── Transcription Function ────────────────────────────────────────────────────

def transcribe_url(
    audio_url: str,
    speakers_expected: int = None,
    poll_interval: int = 5,
    timeout: int = 600
) -> str:
    """
    Submit an audio file URL to AssemblyAI for transcription and poll until completion.
    
    Args:
        audio_url: Direct URL to the audio file (MP3, WAV, etc.)
        speakers_expected: The number of speakers to detect in the audio.
        poll_interval: Seconds between polling requests (default: 5)
        timeout: Maximum seconds to wait before giving up (default: 600)
    
    Returns:
        str: The transcribed text with speaker labels.
        
    Raises:
        requests.HTTPError: For API request issues
        RuntimeError: For transcription errors
        TimeoutError: If transcription takes too long
    """
    print(f"Starting transcription for: {audio_url}")
    
    # Set up transcription parameters
    json_payload = {"audio_url": audio_url}
    if speakers_expected:
        json_payload["speaker_labels"] = True
        json_payload["speakers_expected"] = speakers_expected

    # Submit transcription request
    response = requests.post(
        TRANSCRIBE_URL,
        json=json_payload,
        headers=HEADERS
    )
    response.raise_for_status()
    transcript_id = response.json()["id"]
    print(f"Transcription job started with ID: {transcript_id}")

    # Poll for completion
    start_time = time.time()
    while True:
        poll_resp = requests.get(f"{TRANSCRIBE_URL}/{transcript_id}", headers=HEADERS)
        poll_resp.raise_for_status()
        data = poll_resp.json()
        status = data.get("status")

        print(f"Status: {status}")
        
        if status == "completed":
            print("Transcription completed!")
            if data.get('utterances'):
                # Format the transcript with speaker labels
                transcript = ""
                for utterance in data['utterances']:
                    speaker = utterance['speaker']
                    text = utterance['text']
                    transcript += f"Speaker {speaker}: {text}\n"
                return transcript
            else:
                return data.get("text", "")
        elif status == "error":
            error_msg = data.get("error", "Unknown error")
            raise RuntimeError(f"Transcription failed: {error_msg}")

        if time.time() - start_time > timeout:
            raise TimeoutError(f"Transcription timed out after {timeout} seconds")

        time.sleep(poll_interval)

# ─── Usage Example ─────────────────────────────────────────────────────────────

# Example usage:
# audio_url = "https://example.com/podcast.mp3"
# transcript = transcribe_url(audio_url, speakers_expected=2)
# print("Transcript:", transcript)

In [5]:
# ─── YouTube Transcription Setup ──────────────────────────────────────────────

# Replace with your actual YouTube URL
youtube_url = "https://www.youtube.com/watch?v=WjKQQAFwrR4"

# Required imports for YouTube transcription
import yt_dlp
import tempfile
import os
import requests

def get_youtube_video_info(youtube_url: str) -> dict:
    """
    Extracts metadata from a YouTube video.
    
    Args:
        youtube_url: The URL of the YouTube video.
        
    Returns:
        A dictionary containing video metadata.
    """
    ydl_opts = {'quiet': True}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(youtube_url, download=False)
        return {
            'title': info_dict.get('title'),
            'description': info_dict.get('description'),
            'uploader': info_dict.get('uploader'),
            'tags': info_dict.get('tags'),
        }

def transcribe_youtube_video(
    youtube_url: str,
    poll_interval: int = 5,
    timeout: int = 600
) -> tuple[str, dict]:
    """
    Download and transcribe YouTube audio using AssemblyAI.
    
    Args:
        youtube_url: YouTube video URL
        poll_interval: seconds between polling
        timeout: max seconds to wait
    Returns:
        A tuple containing the transcript text and a dictionary of video metadata.
    """
    print(f"Processing YouTube video: {youtube_url}")
    
    # Extract video metadata
    video_info = get_youtube_video_info(youtube_url)
    print(f"Video Title: {video_info.get('title')}")
    
    # Create a temporary directory to store the download
    with tempfile.TemporaryDirectory() as tmpdir:
        download_opts = {
            'format': 'bestaudio/best',
            'outtmpl': os.path.join(tmpdir, 'audio'), # Use a fixed name inside the temp dir
            'noplaylist': True,
        }
        
        audio_file = None
        with yt_dlp.YoutubeDL(download_opts) as ydl:
            # Download the file
            ydl.download([youtube_url])
            
            # Find the downloaded file (yt-dlp adds the extension)
            for entry in os.listdir(tmpdir):
                if entry.startswith('audio'):
                    audio_file = os.path.join(tmpdir, entry)
                    break
        
        if not audio_file or not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
            raise Exception(f"Failed to download audio file from YouTube.")
    
        print(f"Downloaded to {audio_file} ({os.path.getsize(audio_file)} bytes)")
        
        # Upload to AssemblyAI
        print("Uploading audio to AssemblyAI...")
        try:
            upload_url = upload_file_to_assemblyai(audio_file)
            print(f"Uploaded URL: {upload_url}")
            
            # Get transcript
            transcript = transcribe_url(upload_url, poll_interval=poll_interval, timeout=timeout)
            return transcript, video_info
            
        except Exception as e:
            raise e

def upload_file_to_assemblyai(file_path: str) -> str:
    """
    Uploads a local audio file to AssemblyAI and returns a public URL for transcription.
    """
    if not os.path.exists(file_path) or os.path.getsize(file_path) == 0:
        raise ValueError(f"File is empty or does not exist: {file_path}")
    
    print(f"Uploading file: {file_path} (size: {os.path.getsize(file_path)} bytes)")
    
    with open(file_path, 'rb') as f:
        resp = requests.post(
            'https://api.assemblyai.com/v2/upload',
            headers={'Authorization': API_KEY},
            data=f
        )
    
    resp.raise_for_status()
    return resp.json().get('upload_url')

In [6]:
# Transcribe a YouTube video
print("Starting YouTube video transcription...")
transcript, video_info = transcribe_youtube_video(youtube_url)

print("\n" + "="*50)
print("VIDEO METADATA:")
print("="*50)
for key, value in video_info.items():
    print(f"{key.title()}: {value}")

print("\n" + "="*50)
print("TRANSCRIPT:")
print("="*50)
print(transcript)

Starting YouTube video transcription...
Processing YouTube video: https://www.youtube.com/watch?v=WjKQQAFwrR4
Video Title: How Much Memory for 1,000,000 Threads in 7 Languages | Go, Rust, C#, Elixir, Java, Node, Python
[youtube] Extracting URL: https://www.youtube.com/watch?v=WjKQQAFwrR4
[youtube] WjKQQAFwrR4: Downloading webpage
[youtube] WjKQQAFwrR4: Downloading tv client config
[youtube] WjKQQAFwrR4: Downloading tv player API JSON
[youtube] WjKQQAFwrR4: Downloading ios player API JSON
[youtube] WjKQQAFwrR4: Downloading m3u8 information
[info] WjKQQAFwrR4: Downloading 1 format(s): 251
[download] Destination: /var/folders/1p/vtrslkcd30vfb7ndv5b9c5s40000gp/T/tmp86illz9m/audio
[download] 100% of   24.18MiB in 00:00:02 at 11.60MiB/s    
Downloaded to /var/folders/1p/vtrslkcd30vfb7ndv5b9c5s40000gp/T/tmp86illz9m/audio (25352038 bytes)
Uploading audio to AssemblyAI...
Uploading file: /var/folders/1p/vtrslkcd30vfb7ndv5b9c5s40000gp/T/tmp86illz9m/audio (size: 25352038 bytes)
Uploaded URL: http

In [7]:
# Install required packages
!pip install openai ipywidgets

import os
from openai import OpenAI
from IPython.display import display
import ipywidgets as widgets

# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Prepare default context from metadata and transcript
default_context = (
    f"Title: {video_info.get('title')}\n"
    f"Description: {video_info.get('description')}\n"
    f"Uploader: {video_info.get('uploader')}\n"
    f"Tags: {video_info.get('tags')}\n\n"
    "Transcript:\n" + transcript
)

# Create editable text area for context
context_input = widgets.Textarea(
    value=default_context,
    description='Context:',
    layout=widgets.Layout(width='100%', height='300px')
)
display(context_input)

# Button to generate screenplay
generate_btn = widgets.Button(description='Generate Screenplay')
output = widgets.Output()

def on_generate_clicked(b):
    with output:
        output.clear_output()
        # Construct prompt for LLM
        prompt = (
            "Format the following context and transcript into a screenplay format, with scene headings, speaker names, and dialogue.\n\n" 
            + context_input.value
        )
        # Call the new OpenAI client API
        response = client.chat.completions.create(
            model='gpt-4',
            messages=[
                {"role": "system", "content": "You are a helpful assistant that formats transcripts into screenplays."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7
        )
        screenplay = response.choices[0].message.content
        print(screenplay)

# Display button and output
generate_btn.on_click(on_generate_clicked)
display(generate_btn, output)




Textarea(value="Title: How Much Memory for 1,000,000 Threads in 7 Languages | Go, Rust, C#, Elixir, Java, Node…

Button(description='Generate Screenplay', style=ButtonStyle())

Output()

In [8]:
# Summarize the transcript into concise bullet points
print("Generating concise bullet-point summary of the transcript...")

response = client.chat.completions.create(
    model='gpt-4',
    messages=[
        {"role": "system", "content": "You are a helpful assistant that summarizes text into concise bullet points without omitting important details."},
        {"role": "user", "content": (
            "Here is the transcript from the podcast. "
            "Please provide a concise summary in bullet-point format, covering all key points." +
            f"\n\n{transcript}"
        )}
    ],
    temperature=0.5
)

summary = response.choices[0].message.content
print("\nSummary:\n")
print(summary)

Generating concise bullet-point summary of the transcript...

Summary:

- The podcast discusses a comparison of memory consumption between asynchronous and multi-threaded programs across languages like Rust, Go, Java, C Sharp, Python, and Node.js.
- Some programs consume little over 100 megabytes, while others reach almost three gigabytes.
- It was noted that direct comparison between programs was difficult due to their complexity and differing features.
- The speaker proposes creating synthetic benchmarks for a more fair comparison.
- The speaker also suggests building a server that performs full interpretation and remote compilation to test the speed of different languages.
- The speaker criticizes the use of simple mathematical formulas as tests, arguing that they don't reflect real-world scenarios that involve memory cleanup, connections, and system calls.
- The speaker provides examples of programs written in various languages, noting differences in their memory usage.
- The speak

In [24]:
# Ensure summary variable exists
if 'summary' not in globals():
    raise NameError("Variable 'summary' not defined. Please run the summary cell before generating the video.")

# Generate a scrolling video of the summary with MoviePy
!pip install gtts moviepy

from gtts import gTTS
from moviepy.editor import AudioFileClip, ColorClip, TextClip, CompositeVideoClip
from IPython.display import Video
import os
from moviepy.video.fx.speedx import speedx as video_speedx

# 1. Create TTS audio from the summary
audio_path = 'summary.mp3'
if os.path.exists(audio_path):
    os.remove(audio_path)

tts = gTTS(summary, lang='en')
tts.save(audio_path)

# 2. Load audio to get duration
audio_clip = AudioFileClip(audio_path)
delay = 4  # seconds before audio starts
tail = 2   # seconds after audio ends
duration = audio_clip.duration
video_duration = delay + duration + tail
print(f"Audio duration: {duration} seconds, video duration: {video_duration} seconds")

# Increase video speed by 15% (make it 85% of original duration)
speed_factor = 0.85
video_duration = video_duration * speed_factor
print(f"Sped up video duration: {video_duration} seconds")

# 3. Create a black background clip for full video
bg = ColorClip(size=(1280, 720), color=(0, 0, 0), duration=video_duration)

# 4. Create a TextClip for the summary using ImageMagick
txt_clip = TextClip(
    summary,
    fontsize=30,
    color='white',
    size=(1000, None),
    method='caption'
)

# Calculate text height and width for centering
txt_h = txt_clip.h
txt_w = txt_clip.w
x_center = (1280 - txt_w) // 2

# 5. Animate the text scrolling synced to audio interval
def scroll_pos(t):
    """Move text from bottom to top over entire video duration"""
    # Text scrolls continuously from bottom to top over full video duration
    progress = t / video_duration
    y = 720 - (720 + txt_h) * progress
    return (x_center, y)

moving_txt = txt_clip.set_position(scroll_pos).set_duration(video_duration)

# 6. Composite visuals and attach delayed audio, then speed everything up
# Composite visuals
visuals = CompositeVideoClip([bg, moving_txt], size=(1280, 720))
# Speed up visuals
sped_visuals = visuals.fx(video_speedx, factor=1.15)
# Attach audio with adjusted delay (scaled by speedup)
adjusted_delay = delay / 1.15
video = sped_visuals.set_audio(audio_clip.set_start(adjusted_delay))

# 7. Write and display the video
out_path = 'summary_scrolling_video.mp4'
if os.path.exists(out_path): os.remove(out_path)
video.write_videofile(
    out_path,
    fps=24,
    codec='libx264',
    audio_codec='aac',
    temp_audiofile='temp-audio.m4a',
    remove_temp=True
)

Video(out_path, embed=True, width=640, height=360)

Audio duration: 122.45 seconds, video duration: 128.45 seconds
Sped up video duration: 109.18249999999999 seconds
Audio duration: 122.45 seconds, video duration: 128.45 seconds
Sped up video duration: 109.18249999999999 seconds
Moviepy - Building video summary_scrolling_video.mp4.
MoviePy - Writing audio in temp-audio.m4a
Moviepy - Building video summary_scrolling_video.mp4.
MoviePy - Writing audio in temp-audio.m4a


                                                                     

MoviePy - Done.
Moviepy - Writing video summary_scrolling_video.mp4



                                                                

Moviepy - Done !
Moviepy - video ready summary_scrolling_video.mp4
