In [None]:
import os
import json
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi

def load_secrets():
    """Loads API keys from secrets.json in the current directory."""
    with open("../secrets.json", "r") as f:
        return json.load(f)

def get_video_id(youtube_url):
    """Extracts the video ID from a YouTube URL."""
    if "watch?v=" in youtube_url:
        return youtube_url.split("watch?v=")[-1].split("&")[0]
    elif "youtu.be/" in youtube_url:
        return youtube_url.split("youtu.be/")[-1].split("?")[0]
    else:
        raise ValueError("Invalid YouTube URL")

def get_video_details(video_id, api_key):
    """Retrieves video details (title, description, caption status, embed HTML)."""
    youtube = build("youtube", "v3", developerKey=api_key)
    response = youtube.videos().list(
        part="snippet,contentDetails,player,status",
        id=video_id
    ).execute()
    
    items = response.get("items", [])
    if not items:
        raise ValueError(f"No video found with ID: {video_id}")
    
    item = items[0]
    snippet = item.get("snippet", {})
    content_details = item.get("contentDetails", {})
    player = item.get("player", {})
    
    return {
        "video_id": video_id,
        "title": snippet.get("title", ""),
        "description": snippet.get("description", ""),
        "caption_status": content_details.get("caption", "false"),
        "embed_html": player.get("embedHtml", "")
    }

def get_captions(video_id, lang="en"):
    """Retrieves captions for the video in JSON format."""
    try:
        return YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
    except Exception as e:
        return {"error": str(e)}

if __name__ == "__main__":
    # Load API key from secrets.json (should be in the same directory)
    secrets = load_secrets()
    API_KEY = secrets["YOUTUBE_API_KEY"]
    
    # Example YouTube URL and language setting
    youtube_url = "https://www.youtube.com/watch?v=fhG4uELOClo"
    language = "en"
    
    # Process the video: extract video ID, details, and captions
    video_id = get_video_id(youtube_url)
    video_details = get_video_details(video_id, API_KEY)
    captions = get_captions(video_id, lang=language)
    
    combined_result = {
        "video_details": video_details,
        "captions": captions
    }
    
    # Determine the output folder: one level up to "public/data"
    try:
        # Try to use __file__ if available
        script_dir = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        # Fallback to current working directory if __file__ is not defined
        script_dir = os.getcwd()
        
    output_dir = os.path.join(script_dir, "..", "public", "data")
    os.makedirs(output_dir, exist_ok=True)
    
    # Construct the filename as (video_id)-(language).json
    filename = f"{video_id}-{language}.json"
    filepath = os.path.join(output_dir, filename)
    
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(combined_result, f, indent=2, ensure_ascii=False)
    
    print(f"Data saved to {filepath}")
