In [None]:
import os
import json
import dotenv
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi

# Load environment variables
dotenv.load_dotenv()

# Get API key from .env
API_KEY = os.getenv("YOUTUBE_API_KEY")

if not API_KEY:
    raise ValueError("❌ Missing YOUTUBE_API_KEY in .env file")

def get_video_id(youtube_url):
    """Extracts the video ID from a YouTube URL."""
    if "watch?v=" in youtube_url:
        return youtube_url.split("watch?v=")[-1].split("&")[0]
    elif "youtu.be/" in youtube_url:
        return youtube_url.split("youtu.be/")[-1].split("?")[0]
    else:
        raise ValueError("❌ Invalid YouTube URL format")

def get_video_details(video_id):
    """Retrieves video details (title, description, captions, embed HTML)."""
    youtube = build("youtube", "v3", developerKey=API_KEY)
    response = youtube.videos().list(
        part="snippet,contentDetails,player,status",
        id=video_id
    ).execute()
    
    items = response.get("items", [])
    if not items:
        raise ValueError(f"❌ No video found with ID: {video_id}")
    
    item = items[0]
    snippet = item.get("snippet", {})
    content_details = item.get("contentDetails", {})
    player = item.get("player", {})

    return {
        "video_id": video_id,
        "title": snippet.get("title", ""),
        "description": snippet.get("description", ""),
        "caption_status": content_details.get("caption", "false"),
        "embed_html": player.get("embedHtml", "")
    }

def get_captions(video_id, lang="en"):
    """Retrieves captions for the video in JSON format."""
    try:
        return YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    # Ask user for YouTube video URL
    youtube_url = input("Enter YouTube Video URL: ").strip()

    try:
        video_id = get_video_id(youtube_url)
        video_details = get_video_details(video_id)
        captions = get_captions(video_id, lang="en")

        combined_result = {
            "video_details": video_details,
            "captions": captions
        }

        # Determine output folder with fallback for missing `__file__`
        try:
            script_dir = os.path.dirname(os.path.abspath(__file__))
        except NameError:
            script_dir = os.getcwd()  # Use current working directory if `__file__` is undefined

        output_dir = os.path.join(script_dir, "..", "public", "data")
        os.makedirs(output_dir, exist_ok=True)

        # Save JSON file
        filename = f"{video_id}-en.json"
        filepath = os.path.join(output_dir, filename)

        with open(filepath, "w", encoding="utf-8") as f:
            json.dump(combined_result, f, indent=2, ensure_ascii=False)

        print(f"✅ Data saved to {filepath}")

    except Exception as error:
        print(f"❌ Error: {error}")