In [None]:
import os, shutil
import gradio as gr
import yt_dlp
from pydub import AudioSegment
from faster_whisper import WhisperModel
from langsmith import Client
from langchain.callbacks.tracers.langchain import LangChainTracer
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import openai
# ✅ LangSmith Setup
os.environ["LANGCHAIN_TRACING_V2"] = "true"
#os.environ["OPENAI_API_KEY"] = "sk-proj-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
openai.api_key = os.environ["OPENAI_API_KEY"]
tracer = LangChainTracer(project_name="youtube-analyzer")

# ✅ Global State
segments = []
full_text = ""
bullet_summary = ""
qa_chain = None

# ✅ Summarization
def summarize_with_bullets(text):
    prompt = f"""Summarize the following video transcript into **simple, concise bullet points** (around 5 to 10 bullets):\n\n{text[:3500]}\n\nRespond only with the bullet points, no introduction."""
    return ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, callbacks=[tracer]).invoke([HumanMessage(content=prompt)])

# ✅ Translation
def summarize_and_translate(text):
    summarization_prompt = f"""
    Please summarize the following transcript into 7-10 clear bullet points:

    {text[:3500]}

    Respond only with the bullet points.
    """
    summary = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, callbacks=[tracer]).invoke([HumanMessage(content=summarization_prompt)])

    translation_prompt = f"""
    Translate the following English bullet points into clear and natural Arabic (Fusha).
    Avoid literal translation. Use professional tone and correct grammar:

    {summary.content}
    """
    translated = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, callbacks=[tracer]).invoke([HumanMessage(content=translation_prompt)])

    return translated

# ✅ Transcription (Whisper without splitting)
def transcribe_audio(audio_path):
    global segments, full_text
    audio = AudioSegment.from_mp3(audio_path)
    duration_sec = len(audio) / 1000
    print(f"🎧 Duration: {duration_sec:.2f} seconds")

    model = WhisperModel("base", device="cpu", compute_type="int8")
    result, _ = model.transcribe(audio_path)
    segments = list(result)
    full_text = " ".join([seg.text for seg in segments])
    return full_text

# ✅ Gradio Interface
demo = gr.Blocks(title="🎥 YouTube Analyzer")

with demo:
    gr.Markdown("""
    <div style="text-align: center; font-size: 24px; font-weight: bold; color: #0077b6;">
    🎥 YouTube Video Analyzer
    <br><span style="font-size: 16px;">Paste a YouTube link and explore: Summary / Full Transcript / Translation / QA Chat.</span>
    </div>
    """)

    video_url = gr.Textbox(label="🔗 YouTube URL")
    process_btn = gr.Button("🚀 Process Video")
    status = gr.Markdown()

    chatbot = gr.Chatbot(label="💬 Chat About the Video", height=350)

    with gr.Row():
        suggestion = gr.Radio(["📄 Show Summary", "📜 Show Full Transcript", "🇨 Translate to Arabic"], label="Quick View")

    user_input = gr.Textbox(label="💬 Ask a Question")
    send_btn = gr.Button("💬 Send")

    # ✅ MAIN VIDEO PROCESSING FUNCTION
    def handle_video(video_url):
        global qa_chain, full_text, bullet_summary, segments

        try:
            # 🔁 Reset all state
            full_text = ""
            bullet_summary = ""
            segments = []
            qa_chain = None

            # 🧹 Delete previous vector DB
            if os.path.exists("./chroma_db"):
                shutil.rmtree("./chroma_db")

            yield "📥 Downloading audio from YouTube..."
            audio_path = download_audio(video_url)
            yield ""

            yield "📝 Transcribing audio..."
            full_text = transcribe_audio(audio_path)
            yield ""

            yield "📄 Generating summary..."
            bullet_summary = summarize_with_bullets(full_text).content
            yield ""

            yield "✅ Done! You can now ask questions or explore the transcript."
        except Exception as e:
            yield f"❌ Error: {str(e)}"

    # ✅ BOT QA using RAG
    def chat_with_bot(user_input, history):
        global qa_chain, full_text

        if not full_text.strip():
            history.append(("❌ Error", "⚠️ No transcript available. Please process a video first."))
            return history, ""

        try:
            if qa_chain is None:
                text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
                texts = text_splitter.create_documents([full_text])

                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
                db = Chroma.from_documents(texts, embedding=embeddings, persist_directory="./chroma_db")

                qa_chain = RetrievalQA.from_chain_type(
                    llm=ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0),
                    retriever=db.as_retriever(),
                    return_source_documents=False
                )

            response = qa_chain.run(user_input)
            history.append((f"**You:** {user_input}", f"**Bot:** {response}"))
            return history, ""

        except Exception as e:
            history.append(("❌ Error", f"Something went wrong: {str(e)}"))
            return history, ""

    # ✅ Suggestions
    def simulate_user_input(choice, history):
        global bullet_summary, full_text
        if choice == "📄 Show Summary":
            history.append(("📄 Show Summary", bullet_summary or "❌ Summary not ready."))
        elif choice == "📜 Show Full Transcript":
            history.append(("📜 Show Full Transcript", full_text or "❌ Transcript not ready."))
        elif choice == "🇨 Translate to Arabic":
            if full_text:
                translated = summarize_and_translate(full_text).content
                history.append(("🇨 Translate to Arabic", translated))
            else:
                history.append(("🇨 Translate to Arabic", "❌ Transcript not ready."))
        return history

    # ✅ Audio downloader
    def download_audio(video_url):
        if os.path.exists("audio.mp3"):
            os.remove("audio.mp3")
        ydl_opts = {
            "format": "bestaudio/best",
            "outtmpl": "audio.%(ext)s",
            "postprocessors": [{
                "key": "FFmpegExtractAudio",
                "preferredcodec": "mp3",
                "preferredquality": "64",
            }],
            'quiet': False,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_url])
        return "audio.mp3"

    # ✅ Events
    process_btn.click(fn=handle_video, inputs=[video_url], outputs=status)
    send_btn.click(fn=chat_with_bot, inputs=[user_input, chatbot], outputs=[chatbot, user_input])
    suggestion.change(fn=simulate_user_input, inputs=[suggestion, chatbot], outputs=[chatbot])

demo.launch(share=True)
