In [None]:
import requests
import json
from dataclasses import dataclass, field

import json
from pathlib import Path

from youtube_transcript_api import YouTubeTranscriptApi
import os
from urllib.parse import urlparse, parse_qs
import re
from IPython.display import Markdown

def get_youtube_transcript(video_url: str, languages: tuple[str, ...] | str):
    # ISO 639-1 language code
    languages = (languages,) if isinstance(languages, str) else languages
    video_id = parse_qs(urlparse(video_url).query)['v'][0]
    try:
        # Try preferred language. If the language is not available this will fail.
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=languages) # type: ignore
        # An entry contains text, start and duration.
        full_text = " ".join([entry["text"] for entry in transcript])  # type: ignore
        return full_text
    except Exception as exc:
        print(f"Error fetching transcript: {exc}")
        return None


@dataclass(kw_only=True, frozen=True)
class OpenRouterAiProxy:
    api_key: str = field(repr=False)
    assistant_content = """
        I need to structure these points concisely in French,
        ensuring each major topic is covered with its key arguments and outcomes. 
        The user likely wants a clear, structured summary without missing critical debates or perspectives. 
        I should avoid personal opinions and stick to the transcript's content, 
        highlighting the main discussions, differing viewpoints, and conclusions where present. 
        Also, noting the cultural and political implications of each topic as discussed by the panelists.
""".strip()

    def prompt(self, user_content: str):
        response = requests.post(
            url="https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            },
            data=json.dumps(
                {
                    "model": "deepseek/deepseek-chat-v3-0324:free",
                    # "model": "deepseek/deepseek-r1-zero:free",
                    "messages": [
                        {"role": "user", "content": user_content},
                        {"role": "assistant", "content": self.assistant_content},
                    ],
                }
            ),
        )
        response_dict = json.loads(response.content.decode())
        return response_dict


def retrieve_api_key(
    *, secrets_path: Path = Path.home() / Path(".secrets/secrets.json")
):
    if secrets_path.is_file():
        return json.loads(secrets_path.read_text())["openrouter.ai"]["api-key"]
    else:
        raise FileNotFoundError(f"No secrets found for {secrets_path = !s}")


# basic prompt
prompt = """
Tu es un assistant qui synthétises les transcripts vidéo. 
S'il-te-plait, résume les points clefs du transcript qui suit.
Pour chaque point clef, écris une liste à puces de 3 à 5 éléments.
Le transcript est en français et je souhaite un résumé en français.
Voici le transcript:

{transcript}
"""


# La Chine "encercle" Taïwan : une démonstration de force pour tester Donald Trump ?
video_url = "https://www.youtube.com/watch?v=VTmwhwu1JEc"

# Le Pen, Sarkozy, etc. : peut-on juger les politiques sans faire de politique ? | 28 minutes | ARTE
video_url = "https://www.youtube.com/watch?v=6JGOZ6KzySo"

# La CHUTE ANNONCÉE de la maison Le Pen
video_url = "https://www.youtube.com/watch?v=blaMIWIIGsc"

# Redéfinition pénale du viol, inéligibilité de Le Pen, Grok : Le Club | 28 minutes | ARTE
video_url = "https://www.youtube.com/watch?v=zth-w2tHv8Q"

# Réchauffement climatique : la climatisation au cœur du problème ? | 28 minutes | ARTE
video_url = "https://www.youtube.com/watch?v=w9jww6YUjzQ"

# "Demain, deux ou trois visions de la démocratie vont s'affronter", selon l'essayiste Blanche Leridon
video_url = "https://www.youtube.com/watch?v=BH-T06BuEgg"

# J'ai lu le livre de Bardella (pour que vous n'ayez pas à le faire)
video_url = "https://www.youtube.com/watch?v=wqiwFo6WB_M&t=20s"

# La France et ses outre-mer : des territoires complexes | Le dessous des cartes - ARTE
video_url = "https://www.youtube.com/watch?v=jS66aYoRa2k"

# VU du 05/04/2025 : GROK décrit MUSK
video_url = "https://www.youtube.com/watch?v=XwOGO6XCt0A"

# Marine Le Pen à l’offensive, droits de douane, Godlywood : Le Club international | 28 minutes | ARTE
video_url = "https://www.youtube.com/watch?v=MMGNxoVNrxQ"

def summarize_youtube_video(video_url: str, prompt: str) -> str:
    print(f"Fetching transcript for video: {video_url}")
    transcript = get_youtube_transcript(video_url, languages='fr')
    if not transcript:
        raise RuntimeError("Could not retrieve transcript")

    api_key = retrieve_api_key()
    proxy = OpenRouterAiProxy(api_key=api_key)
    response = proxy.prompt(prompt.format(transcript=transcript))
    summary = (response["choices"][0]["message"]["content"])
    return summary

result = summarize_youtube_video(video_url, prompt)
display(Markdown(result))
print(result.replace("**", "*"))

In [None]:
print(result.replace("**", "*"))