In [None]:
!pip install langchain transformers yt-dlp pydub
!pip install -U langchain-community
!pip install youtube-transcript-api
!pip install googletrans==4.0.0-rc1


In [None]:
import streamlit as st
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import pipeline
from googletrans import Translator

# Step 1: Get YouTube Transcript
def fetch_transcript(video_url):
    loader = YoutubeLoader.from_youtube_url(video_url, add_video_info=False)
    docs = loader.load()
    if not docs:
        raise ValueError("❌ No transcript found.")
    return docs[0].page_content

# Step 2: Split Transcript
def split_text(text, chunk_size=1000, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return splitter.split_text(text)

# Step 3: Translate Tamil → English (if needed)
def translate_chunks(chunks, source_lang='ta'):
    if source_lang == 'ta':
        translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ta")
        translated = []
        for chunk in chunks:
            try:
                result = translator(chunk[:512])[0]['translation_text']
                translated.append(result)
            except Exception as e:
                print(f"⚠️ Translation error: {e}")
        return translated
    return chunks

# Step 4: Summarize Text
def summarize_chunks(chunks):
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    summaries = []
    for chunk in chunks:
        try:
            result = summarizer(chunk, max_length=50, min_length=10, do_sample=False)
            summaries.append(result[0]['summary_text'])
        except Exception as e:
            print(f"⚠️ Summarization error: {e}")
    return "\n".join(summaries)

# Step 5: Translate Summary into Multiple Languages
def translate_summary(summary, target_languages):
    translator = Translator()
    translations = {}

    for lang in target_languages:
        try:
            translated = translator.translate(summary, dest=lang)
            translations[lang] = translated.text
        except Exception as e:
            print(f"⚠️ Error translating to {lang}: {e}")

    return translations

# Step 6: Full Pipeline
def summarize_youtube_video(video_url, source_language='auto', target_languages=['en']):
    print("📥 Fetching transcript...")
    transcript = fetch_transcript(video_url)

    print("✂️ Splitting transcript...")
    chunks = split_text(transcript)

    if source_language == 'ta':
        print("🌐 Translating Tamil → English...")
        chunks = translate_chunks(chunks, source_lang='ta')

    print("🧠 Summarizing chunks...")
    final_summary = summarize_chunks(chunks)

    print("🌐 Translating Summary into Selected Languages...")
    translations = translate_summary(final_summary, target_languages)

    return final_summary, translations


# --- Colab Interface ---
import ipywidgets as widgets
from IPython.display import display, clear_output

# Create widgets
url_input = widgets.Text(placeholder='Enter YouTube Video URL', description='Video URL:')
lang_input = widgets.Dropdown(options=['auto', 'ta'], description='Language:')
target_lang_input = widgets.Text(placeholder='Enter target languages (comma separated, e.g. "ta,ml,en")', description='Target Languages:')
output_area = widgets.Output()

def on_button_click(b):
    with output_area:
        clear_output()
        url = url_input.value
        lang = lang_input.value
        target_languages = target_lang_input.value.split(',')

        if url:
            try:
                summary, translations = summarize_youtube_video(url, source_language=lang, target_languages=target_languages)

                print("\n✅ Final Summary:")
                print(summary)

                print("\n🌐 Translations:")
                for lang, translation in translations.items():
                    print(f"{lang}: {translation}")
            except Exception as e:
                print(f"❌ Error: {str(e)}")
        else:
            print("Please provide a valid YouTube URL.")

# Create a button to run the logic
run_button = widgets.Button(description="Summarize and Translate")
run_button.on_click(on_button_click)

# Display widgets
display(url_input, lang_input, target_lang_input, run_button, output_area)
