In [1]:
# single cell: improved TTS failover + image prompt improvements
# Run in Colab. If local, adapt apt installs. Requires internet for edge-tts/gTTS and HF for SD images.
!pip install -q edge-tts gTTS moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

# On Linux (Colab) install espeak and ffmpeg for pyttsx3 and audio conversions:
!apt-get update -qq
!apt-get install -y -qq espeak ffmpeg

import os, shutil, asyncio, nest_asyncio, io, sys, time
from pathlib import Path
import wave, struct
from gtts import gTTS
import edge_tts
import pyttsx3
from moviepy.editor import AudioFileClip
from PIL import Image
import torch
nest_asyncio.apply()

TEMP_DIR = "/content/_temp"
os.makedirs(TEMP_DIR, exist_ok=True)

def file_exists_and_nonzero(p): return os.path.exists(p) and os.path.getsize(p) > 0

# --- Utilities: silence wav writer ---
def write_silent_wav(path, duration_s=2.0, sr=22050):
    n_frames = int(duration_s * sr)
    with wave.open(path, 'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0) * n_frames)
    return path

# --- List edge voices (and return full objects) ---
async def list_edge_voices():
    try:
        voices = await edge_tts.list_voices()
        # voices is a list of dicts with keys like 'Name','ShortName','Locale','Gender'
        print(f"[edge] returned {len(voices)} voices (sample):")
        for v in voices[:10]:
            print("  ", v.get("ShortName"), v.get("Locale"), v.get("Gender"))
        return voices
    except Exception as e:
        print("[edge] list_voices error:", e)
        return []

# --- Try to synthesize with edge-tts robustly ---
async def try_edge_speak(text, voice_shortname, out_wav, max_attempts=3):
    # try different format / SSML combinations if needed
    for attempt in range(1, max_attempts+1):
        try:
            # Use a conservative rate and simple text
            communicator = edge_tts.Communicate(text, voice=voice_shortname)
            # save() uses default format; we'll attempt and then validate file size.
            await communicator.save(out_wav)
            if file_exists_and_nonzero(out_wav) and os.path.getsize(out_wav) > 200:  # >200 bytes sanity
                return True
            else:
                print(f"[edge] attempt {attempt} created file but it's empty/too small.")
        except Exception as e:
            print(f"[edge] attempt {attempt} failed for {voice_shortname}: {e}")
        await asyncio.sleep(0.5)
    return False

# --- Fallback: gTTS (google) -> mp3 -> wav via ffmpeg (ffmpeg required) ---
def gtts_to_wav(text, out_wav):
    try:
        mp3_path = out_wav + ".mp3"
        tts = gTTS(text=text, lang="te")
        tts.save(mp3_path)
        # convert mp3 -> wav using ffmpeg (should be available in Colab)
        cmd = f"ffmpeg -y -i {mp3_path} -ar 22050 -ac 1 {out_wav} -loglevel quiet"
        res = os.system(cmd)
        if file_exists_and_nonzero(out_wav):
            os.remove(mp3_path)
            return True
        else:
            print("[gTTS] conversion failed or produced empty file")
            return False
    except Exception as e:
        print("[gTTS] failed:", e)
        return False

# --- pyttsx3 fallback (requires espeak on Linux) ---
def pyttsx3_speak(text, out_wav):
    try:
        engine = pyttsx3.init()
        # try to select a male-like voice if present
        voices = engine.getProperty('voices')
        chosen = None
        for v in voices:
            # many linux voices won't have friendly metadata, so fallback heuristics
            if 'male' in (getattr(v,'name','') or '').lower() or 'male' in (getattr(v,'id','') or '').lower():
                chosen = v; break
        if chosen:
            engine.setProperty('voice', chosen.id)
        engine.save_to_file(text, out_wav)
        engine.runAndWait()
        if file_exists_and_nonzero(out_wav):
            return True
    except Exception as e:
        print("[pyttsx3] failed:", e)
    return False

# --- High-level chooser: prefer edge male Telugu, else any Telugu, else gTTS, else pyttsx3, else silent ---
async def choose_working_tts(preferred_list=None):
    if preferred_list is None:
        preferred_list = ["te-IN-MohanNeural", "te-IN-PriyaNeural", "te-IN-ShrutiNeural"]
    voices = await list_edge_voices()
    shortnames = [v.get("ShortName") for v in voices]
    # first try preferred shortnames that actually exist
    for pref in preferred_list:
        if pref in shortnames:
            test_path = os.path.join(TEMP_DIR, f"edge_test_{pref}.wav")
            ok = await try_edge_speak("నమస్కారం", pref, test_path, max_attempts=2)
            if ok:
                print("[choose] will use edge-tts voice:", pref)
                return ("edge", pref)
            else:
                print(f"[choose] edge voice {pref} exists but failed playback test.")
    # try to pick any telugu male voice from the list
    for v in voices:
        if v.get("Locale","").startswith("te") or v.get("Locale","").startswith("te-IN"):
            # prefer male
            if v.get("Gender","").lower() == "male":
                test_path = os.path.join(TEMP_DIR, f"edge_test_{v['ShortName']}.wav")
                ok = await try_edge_speak("నమస్కారం", v['ShortName'], test_path, max_attempts=2)
                if ok:
                    print("[choose] found telugu male edge voice:", v['ShortName'])
                    return ("edge", v['ShortName'])
    # try any telugu edge voice
    for v in voices:
        if v.get("Locale","").startswith("te"):
            test_path = os.path.join(TEMP_DIR, f"edge_test_{v['ShortName']}.wav")
            ok = await try_edge_speak("నమస్కారం", v['ShortName'], test_path, max_attempts=2)
            if ok:
                print("[choose] found telugu edge voice (not male-preferred):", v['ShortName'])
                return ("edge", v['ShortName'])
    # Next fallback to gTTS
    test_g = os.path.join(TEMP_DIR, "gtts_test.wav")
    if gtts_to_wav("నమస్కారం", test_g):
        print("[choose] using gTTS fallback (Google TTS, telugu)")
        return ("gtts", None)
    # Next fallback to pyttsx3
    test_py = os.path.join(TEMP_DIR, "pyttsx3_test.wav")
    if pyttsx3_speak("నమస్కారం", test_py):
        print("[choose] using pyttsx3 fallback")
        return ("pyttsx3", None)
    print("[choose] no TTS available; will use silent")
    return (None, None)

# --- Generate TTS for a list of news sentences; returns path to final wav ---
async def generate_tts(news_texts, out_audio, chosen_engine, engine_voice=None):
    # ensure out dir
    os.makedirs(os.path.dirname(out_audio) or ".", exist_ok=True)
    if chosen_engine == "edge":
        # create per-sentence files and concatenate using moviepy.AudioFileClip
        parts = []
        for i, txt in enumerate(news_texts):
            p = os.path.join(TEMP_DIR, f"edge_part_{i}.wav")
            ok = await try_edge_speak(txt, engine_voice, p, max_attempts=3)
            if not ok:
                print(f"[generate_tts] edge failed for sentence {i+1}, using silent chunk")
                write_silent_wav(p, duration_s=1.5)
            parts.append(p)
        # concatenate via ffmpeg or moviepy
        # We'll use ffmpeg concat: create a "files.txt"
        listfile = os.path.join(TEMP_DIR, "concat_list.txt")
        with open(listfile, "w") as f:
            for p in parts:
                f.write(f"file '{p}'\n")
        cmd = f"ffmpeg -y -f concat -safe 0 -i {listfile} -c copy {out_audio} -loglevel quiet"
        os.system(cmd)
        if file_exists_and_nonzero(out_audio):
            return out_audio
        else:
            print("[generate_tts] ffmpeg concat failed; falling back to single-file creation")
            # fallback: combine with moviepy (less efficient)
            try:
                clips = [AudioFileClip(p) for p in parts]
                if len(clips) == 1:
                    clips[0].write_audiofile(out_audio, fps=22050, verbose=False, logger=None)
                else:
                    from moviepy.editor import concatenate_audioclips
                    final = concatenate_audioclips(clips)
                    final.write_audiofile(out_audio, fps=22050, verbose=False, logger=None)
                    final.close()
                for c in clips:
                    try: c.close()
                    except: pass
                return out_audio if file_exists_and_nonzero(out_audio) else None
            except Exception as e:
                print("[generate_tts] moviepy concat fallback failed:", e)
                return None

    elif chosen_engine == "gtts":
        # single joined text to gTTS
        joined = " । ".join(news_texts)  # join using a danda-like separator
        ok = gtts_to_wav(joined, out_audio)
        if ok: return out_audio
        else:
            write_silent_wav(out_audio, duration_s=max(2.0, len(news_texts)*1.2))
            return out_audio

    elif chosen_engine == "pyttsx3":
        joined = " । ".join(news_texts)
        if pyttsx3_speak(joined, out_audio):
            return out_audio
        else:
            write_silent_wav(out_audio, duration_s=max(2.0, len(news_texts)*1.2))
            return out_audio
    else:
        write_silent_wav(out_audio, duration_s=max(3.0, len(news_texts)*1.5))
        return out_audio

# --- Improved image prompt generator: uses actual Telugu text to construct a descriptive prompt ---
def build_sd_prompt(news_text, extra_style="photo-realistic, high detail, editorial news photography"):
    # Use the Telugu sentence but add an English description elaborating key nouns if possible.
    # If you want exact literal translation in prompt you can supply translations from user or use a translator.
    # For now, we embed the Telugu sentence and also provide a short english hint for SD models.
    # Keep negative prompt to avoid printed text in the image.
    negative = "text, watermark, logo, signature, lowres, duplicate"
    # prompt: include the Telugu string (so multilingual SD can pick it up) and add style hints
    prompt = f"'{news_text}' -- {extra_style} -- editorial composition, 3:2 aspect"
    return prompt, negative

# ---------------------------
# Example usage
# ---------------------------
async def demo_run():
    # Example news theme
    themes = {
        "political": [
            "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
            "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
            "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది."
        ]
    }
    # Choose TTS
    engine_type, engine_voice = await choose_working_tts(preferred_list=["te-IN-MohanNeural","te-IN-Mohan","te-IN-PriyaNeural"])
    print("[demo] chosen engine:", engine_type, engine_voice)
    for theme, lines in themes.items():
        out_audio = f"/content/{theme}_news.wav"
        print("[demo] generating tts for theme:", theme)
        await generate_tts(lines, out_audio, engine_type, engine_voice)
        if file_exists_and_nonzero(out_audio):
            print("-> saved audio:", out_audio, "duration(s):", AudioFileClip(out_audio).duration)
        else:
            print("-> audio generation failed, produced empty file")
        # create image prompts:
        imgs = []
        for i, sentence in enumerate(lines):
            p, neg = build_sd_prompt(sentence)
            print("[demo] prompt", i+1, ":", p, " (neg:", neg, ")")
            # Here you should call your SD pipeline with prompt p and negative prompt neg
            # Example placeholder:
            placeholder = f"/content/{theme}_img_{i+1}.png"
            # if you have a DiffusionPipeline instance 'pipe', call:
            # image = pipe(p, negative_prompt=neg, width=720, height=480).images[0]; image.save(placeholder)
            # For now create a placeholder image:
            Image.new("RGB",(720,480),(80,80,80)).save(placeholder)
            imgs.append(placeholder)
        print("[demo] images for theme", theme, "->", imgs)

# Run demo
asyncio.run(demo_run())


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m133.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.7/54.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hW: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 125082 file

ModuleNotFoundError: No module named 'pyttsx3'

In [2]:
# ============================================================
# FINAL: Telugu AI News with Male Voice + Realistic Images
# ============================================================

# STEP 1: Install dependencies (run once)
!pip install -q edge-tts gTTS pyttsx3 moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

# Linux (Colab) extras for audio engines
!apt-get update -qq
!apt-get install -y -qq espeak ffmpeg

# STEP 2: Imports
import os, shutil, asyncio, nest_asyncio, io, sys, time, wave, struct
from pathlib import Path
from gtts import gTTS
import edge_tts, pyttsx3
from moviepy.editor import AudioFileClip
from PIL import Image
import torch
from diffusers import DiffusionPipeline
nest_asyncio.apply()

TEMP_DIR = "/content/_temp"
os.makedirs(TEMP_DIR, exist_ok=True)

# ============================================================
# Utility functions
# ============================================================
def file_exists_and_nonzero(p):
    return os.path.exists(p) and os.path.getsize(p) > 0

def write_silent_wav(path, duration_s=2.0, sr=22050):
    n_frames = int(duration_s * sr)
    with wave.open(path, 'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0) * n_frames)
    return path

# ============================================================
# TTS Selection and Generation
# ============================================================
async def list_edge_voices():
    try:
        voices = await edge_tts.list_voices()
        telugu = [v for v in voices if v.get("Locale","").startswith("te")]
        print(f"[edge] Found {len(telugu)} Telugu voices:")
        for v in telugu:
            print("  ", v["ShortName"], "-", v.get("Gender"))
        return telugu
    except Exception as e:
        print("[edge] voice list error:", e)
        return []

async def try_edge_tts(text, voice, out_wav):
    try:
        await edge_tts.Communicate(text, voice=voice).save(out_wav)
        return file_exists_and_nonzero(out_wav)
    except Exception as e:
        print(f"[edge] failed for {voice}:", e)
        return False

def gtts_to_wav(text, out_wav):
    try:
        mp3 = out_wav + ".mp3"
        gTTS(text=text, lang="te").save(mp3)
        os.system(f"ffmpeg -y -i {mp3} -ar 22050 -ac 1 {out_wav} -loglevel quiet")
        os.remove(mp3)
        return file_exists_and_nonzero(out_wav)
    except Exception as e:
        print("[gTTS] failed:", e)
        return False

def pyttsx3_speak(text, out_wav):
    try:
        engine = pyttsx3.init()
        voices = engine.getProperty('voices')
        for v in voices:
            if "male" in v.name.lower():
                engine.setProperty('voice', v.id)
                break
        engine.save_to_file(text, out_wav)
        engine.runAndWait()
        return file_exists_and_nonzero(out_wav)
    except Exception as e:
        print("[pyttsx3] failed:", e)
        return False

async def choose_working_tts():
    voices = await list_edge_voices()
    preferred = ["te-IN-MohanNeural", "te-IN-Mohan", "te-IN-PriyaNeural"]
    for v in preferred:
        if any(v == vv["ShortName"] for vv in voices):
            test = os.path.join(TEMP_DIR, f"test_{v}.wav")
            if await try_edge_tts("నమస్కారం", v, test):
                print("[TTS] Using Edge voice:", v)
                return ("edge", v)
    # fallback male voice
    for vv in voices:
        if vv.get("Gender","").lower() == "male":
            test = os.path.join(TEMP_DIR, f"test_{vv['ShortName']}.wav")
            if await try_edge_tts("నమస్కారం", vv["ShortName"], test):
                print("[TTS] Using Edge male voice:", vv["ShortName"])
                return ("edge", vv["ShortName"])
    # fallback gTTS
    test = os.path.join(TEMP_DIR, "test_gtts.wav")
    if gtts_to_wav("నమస్కారం", test):
        print("[TTS] Using gTTS fallback")
        return ("gtts", None)
    # fallback pyttsx3
    test = os.path.join(TEMP_DIR, "test_pyttsx3.wav")
    if pyttsx3_speak("నమస్కారం", test):
        print("[TTS] Using pyttsx3 fallback")
        return ("pyttsx3", None)
    print("[TTS] No TTS available, silent fallback")
    return (None, None)

async def generate_tts(news_lines, out_audio, engine_type, engine_voice):
    os.makedirs(os.path.dirname(out_audio) or ".", exist_ok=True)
    if engine_type == "edge":
        clips = []
        for i, line in enumerate(news_lines):
            out = os.path.join(TEMP_DIR, f"edge_{i}.wav")
            ok = await try_edge_tts(line, engine_voice, out)
            if not ok: write_silent_wav(out, 1.5)
            clips.append(f"file '{out}'")
        with open(os.path.join(TEMP_DIR, "list.txt"), "w") as f:
            f.write("\n".join(clips))
        os.system(f"ffmpeg -y -f concat -safe 0 -i {TEMP_DIR}/list.txt -c copy {out_audio} -loglevel quiet")
        return out_audio
    elif engine_type == "gtts":
        joined = " ".join(news_lines)
        gtts_to_wav(joined, out_audio)
        return out_audio
    elif engine_type == "pyttsx3":
        joined = " ".join(news_lines)
        pyttsx3_speak(joined, out_audio)
        return out_audio
    else:
        write_silent_wav(out_audio, 3.0)
        return out_audio

# ============================================================
# Image Generation (Stable Diffusion)
# ============================================================
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
    pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
                                             torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)
    pipe.safety_checker = None
except Exception as e:
    print("[warn] could not load SD:", e)
    pipe = None

def generate_image(prompt, path):
    if pipe is None:
        Image.new("RGB",(720,480),(60,60,60)).save(path)
        return path
    try:
        res = pipe(prompt, width=720, height=480,
                   guidance_scale=7.5,
                   negative_prompt="text, watermark, logo, blur, low quality")
        res.images[0].save(path)
        return path
    except Exception as e:
        print("[image] fail:", e)
        Image.new("RGB",(720,480),(60,60,60)).save(path)
        return path

# ============================================================
# THEMES (you can expand later)
# ============================================================
themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది."
    ]
}

# ============================================================
# MAIN RUNNER
# ============================================================
async def main():
    engine_type, engine_voice = await choose_working_tts()
    print(f"[RUN] Chosen Engine: {engine_type}, Voice: {engine_voice}")
    for theme, lines in themes.items():
        print(f"\n🎙️ Generating for theme: {theme}")
        os.makedirs(f"/content/output/{theme}/images", exist_ok=True)
        os.makedirs(f"/content/output/{theme}/audio", exist_ok=True)
        audio_path = f"/content/output/{theme}/audio/{theme}.wav"
        await generate_tts(lines, audio_path, engine_type, engine_voice)
        print("✅ Audio saved:", audio_path)
        for i, text in enumerate(lines):
            prompt = f"{text}, news photography, realistic photo, Telugu news background"
            img_path = f"/content/output/{theme}/images/{i+1}.png"
            generate_image(prompt, img_path)
            print("🖼️ Image saved:", img_path)

# Run everything
asyncio.run(main())


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


  IMAGEMAGICK_BINARY = r"C:\Program Files\ImageMagick-6.8.8-Q16\magick.exe"
  lines_video = [l for l in lines if ' Video: ' in l and re.search('\d+x\d+', l)]
  rotation_lines = [l for l in lines if 'rotate          :' in l and re.search('\d+$', l)]
  match = re.search('\d+$', rotation_line)
  if event.key is 'enter':



model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


[edge] Found 2 Telugu voices:
   te-IN-ShrutiNeural - Female
   te-IN-MohanNeural - Male
[edge] failed for te-IN-MohanNeural: No audio was received. Please verify that your parameters are correct.
[edge] failed for te-IN-MohanNeural: No audio was received. Please verify that your parameters are correct.
[TTS] Using gTTS fallback
[RUN] Chosen Engine: gtts, Voice: None

🎙️ Generating for theme: political


Token indices sequence length is longer than the specified maximum sequence length for this model (104 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ు త ు న ్ న ా య ి., news photography , realistic photo , telugu news background']


✅ Audio saved: /content/output/political/audio/political.wav


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['న ం ప ్ రకట ిం చ ిం ద ి., news photography , realistic photo , telugu news background']


🖼️ Image saved: /content/output/political/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['స ా గ ు త ోం ద ి., news photography , realistic photo , telugu news background']


🖼️ Image saved: /content/output/political/images/2.png


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Image saved: /content/output/political/images/3.png


In [3]:
# ============================================================
# FINAL: Telugu News — force male Edge-TTS if available
# ============================================================

# Install (run once)
!pip install -q edge-tts gTTS pyttsx3 moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime
!apt-get update -qq
!apt-get install -y -qq espeak ffmpeg

# Imports
import os, asyncio, nest_asyncio, wave, struct, shutil
from pathlib import Path
from gtts import gTTS
import edge_tts, pyttsx3
from moviepy.editor import AudioFileClip
from PIL import Image
import torch
from diffusers import DiffusionPipeline
nest_asyncio.apply()

# Config: if you want to force a specific ShortName, set it here (exact ShortName as printed by list_voices())
FORCED_VOICE = None  # e.g. "te-IN-MohanNeural" or None to auto-select

TEMP_DIR = "/content/_temp"
os.makedirs(TEMP_DIR, exist_ok=True)

def file_exists_and_nonzero(p): return os.path.exists(p) and os.path.getsize(p) > 0

def write_silent_wav(path, duration_s=2.0, sr=22050):
    n_frames = int(duration_s * sr)
    with wave.open(path, 'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0) * n_frames)
    return path

# --- Edge voices listing ---
async def list_edge_voices():
    try:
        voices = await edge_tts.list_voices()
        # voices are dicts: ShortName, Locale, Gender, Name etc.
        print(f"[edge] total voices available: {len(voices)} (showing Telugu subset first)")
        telugu = [v for v in voices if v.get("Locale","").lower().startswith("te")]
        for v in telugu:
            print("  TEL:", v.get("ShortName"), "| Gender:", v.get("Gender"), "| Name:", v.get("Name"))
        # also print some sample non-Telugu male voices (useful if Telugu male not present)
        non_telugu_male = [v for v in voices if v.get("Gender","").lower()=="male" and not v.get("Locale","").lower().startswith("te")]
        if non_telugu_male:
            print("\n[edge] sample male (non-Telugu) voices:")
            for v in non_telugu_male[:6]:
                print(" ", v.get("ShortName"), "| Locale:", v.get("Locale"), "| Name:", v.get("Name"))
        return voices
    except Exception as e:
        print("[edge] list_voices error:", e)
        return []

# --- Try edge tts ---
async def try_edge_tts(text, voice_shortname, out_wav, attempts=2):
    for a in range(attempts):
        try:
            await edge_tts.Communicate(text, voice=voice_shortname).save(out_wav)
            if file_exists_and_nonzero(out_wav) and os.path.getsize(out_wav) > 200:
                return True
            else:
                print(f"[edge] attempt {a+1} for {voice_shortname} created too-small file.")
        except Exception as e:
            print(f"[edge] attempt {a+1} for {voice_shortname} error:", e)
        await asyncio.sleep(0.3)
    return False

# --- gTTS fallback (gender-neutral) ---
def gtts_to_wav(text, out_wav):
    try:
        mp3 = out_wav + ".mp3"
        gTTS(text=text, lang="te").save(mp3)
        os.system(f"ffmpeg -y -i {mp3} -ar 22050 -ac 1 {out_wav} -loglevel quiet")
        if file_exists_and_nonzero(out_wav):
            os.remove(mp3)
            return True
        return False
    except Exception as e:
        print("[gTTS] error:", e)
        return False

# --- pyttsx3 fallback (offline) ---
def pyttsx3_speak(text, out_wav):
    try:
        engine = pyttsx3.init()
        voices = engine.getProperty('voices')
        # Try heuristics to pick a male voice
        chosen = None
        for v in voices:
            attrs = " ".join([str(getattr(v, attr, "")).lower() for attr in ("name","id")])
            if "male" in attrs:
                chosen = v
                break
        if chosen:
            engine.setProperty('voice', chosen.id)
        engine.save_to_file(text, out_wav)
        engine.runAndWait()
        return file_exists_and_nonzero(out_wav)
    except Exception as e:
        print("[pyttsx3] error:", e)
        return False

# --- Strong male selection logic ---
async def choose_male_tts():
    voices = await list_edge_voices()
    # 1) If user forced an exact ShortName, try it first
    if FORCED_VOICE:
        print("[choose] FORCED_VOICE set to:", FORCED_VOICE)
        test = os.path.join(TEMP_DIR, f"forced_test.wav")
        if await try_edge_tts("నమస్కారం", FORCED_VOICE, test):
            print("[choose] forced voice worked:", FORCED_VOICE)
            return ("edge", FORCED_VOICE)
        else:
            print("[choose] forced voice failed:", FORCED_VOICE)

    # 2) Prefer Telugu voices explicitly labeled male
    telugu_male = [v for v in voices if v.get("Locale","").lower().startswith("te") and v.get("Gender","").lower()=="male"]
    if telugu_male:
        print("[choose] Telugu male voices found:", [v['ShortName'] for v in telugu_male])
        for v in telugu_male:
            test = os.path.join(TEMP_DIR, f"telugu_male_{v['ShortName']}.wav")
            if await try_edge_tts("నమస్కారం", v['ShortName'], test):
                print("[choose] using Telugu male voice:", v['ShortName'])
                return ("edge", v['ShortName'])
            else:
                print("[choose] telugu male voice failed test:", v['ShortName'])

    # 3) If no Telugu male, try any voice whose Gender == male (any locale)
    any_male = [v for v in voices if v.get("Gender","").lower()=="male"]
    if any_male:
        print("[choose] trying any male voice (non-Telugu fallback)")
        for v in any_male:
            test = os.path.join(TEMP_DIR, f"any_male_{v['ShortName']}.wav")
            if await try_edge_tts("నమస్కారం", v['ShortName'], test):
                print("[choose] using male voice:", v['ShortName'], "| Locale:", v.get("Locale"))
                return ("edge", v['ShortName'])

    # 4) As a weaker heuristic: attempt preferred shortnames if present (some runtimes name differently)
    preferred_try = ["te-IN-MohanNeural", "Mohan", "MohanNeural"]
    for p in preferred_try:
        for v in voices:
            if p.lower() in (v.get("ShortName","").lower() + v.get("Name","").lower()):
                test = os.path.join(TEMP_DIR, f"pref_{v['ShortName']}.wav")
                if await try_edge_tts("నమస్కారం", v['ShortName'], test):
                    print("[choose] using heuristic-preferred voice:", v['ShortName'])
                    return ("edge", v['ShortName'])

    # 5) gTTS fallback (gender-neutral but reliable for Telugu)
    test_g = os.path.join(TEMP_DIR, "gtts_test.wav")
    if gtts_to_wav("నమస్కారం", test_g):
        print("[choose] falling back to gTTS (gender-neutral). If you need strictly male, enable a paid TTS or provide a forced ShortName.")
        return ("gtts", None)

    # 6) pyttsx3 offline fallback
    test_py = os.path.join(TEMP_DIR, "py_test.wav")
    if pyttsx3_speak("నమస్కారం", test_py):
        print("[choose] falling back to pyttsx3")
        return ("pyttsx3", None)

    # 7) silent
    print("[choose] No TTS available; will use silent fallback")
    return (None, None)

# --- generate per-sentence audio and concat ---
async def generate_tts(news_lines, out_wav, engine_type, engine_voice):
    os.makedirs(os.path.dirname(out_wav) or ".", exist_ok=True)
    if engine_type == "edge":
        parts = []
        for idx, line in enumerate(news_lines):
            p = os.path.join(TEMP_DIR, f"edge_part_{idx}.wav")
            ok = await try_edge_tts(line, engine_voice, p)
            if not ok:
                print(f"[generate] edge failed for line {idx+1}; using silent chunk")
                write_silent_wav(p, 1.5)
            parts.append(p)
        listfile = os.path.join(TEMP_DIR, "concat_list.txt")
        with open(listfile, "w") as f:
            for p in parts:
                f.write(f"file '{p}'\n")
        # concat via ffmpeg
        os.system(f"ffmpeg -y -f concat -safe 0 -i {listfile} -c copy {out_wav} -loglevel quiet")
        if file_exists_and_nonzero(out_wav):
            return out_wav
        # fallback: moviepy concat
        from moviepy.editor import AudioFileClip, concatenate_audioclips
        clips = [AudioFileClip(p) for p in parts]
        final = concatenate_audioclips(clips)
        final.write_audiofile(out_wav, fps=22050, verbose=False, logger=None)
        final.close()
        for c in clips:
            try: c.close()
            except: pass
        return out_wav
    elif engine_type == "gtts":
        joined = " । ".join(news_lines)
        if gtts_to_wav(joined, out_wav):
            return out_wav
        write_silent_wav(out_wav, duration_s=max(2.0, len(news_lines)*1.5))
        return out_wav
    elif engine_type == "pyttsx3":
        joined = " । ".join(news_lines)
        if pyttsx3_speak(joined, out_wav):
            return out_wav
        write_silent_wav(out_wav, duration_s=max(2.0, len(news_lines)*1.5))
        return out_wav
    else:
        write_silent_wav(out_wav, duration_s=max(2.0, len(news_lines)*1.5))
        return out_wav

# --- Stable Diffusion image function (placeholder if no SD) ---
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
    pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
                                             torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)
    pipe.safety_checker = None
except Exception as e:
    print("[warn] SD pipeline not loaded:", e)
    pipe = None

def generate_image(prompt, path):
    if pipe is None:
        Image.new("RGB",(720,480),(80,80,80)).save(path)
        return path
    try:
        res = pipe(prompt, width=720, height=480,
                   guidance_scale=7.5,
                   negative_prompt="text, watermark, logo, signature, lowres")
        res.images[0].save(path)
        return path
    except Exception as e:
        print("[image] error:", e)
        Image.new("RGB",(720,480),(80,80,80)).save(path)
        return path

# -------------------------
# Example themes (expand)
# -------------------------
themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది."
    ]
}

# -------------------------
# Runner
# -------------------------
async def run_all():
    engine_type, engine_voice = await choose_male_tts()
    print("[run] Selected engine:", engine_type, "| voice:", engine_voice)
    for theme, lines in themes.items():
        outdir = f"/content/output/{theme}"
        os.makedirs(outdir + "/images", exist_ok=True)
        os.makedirs(outdir + "/audio", exist_ok=True)
        wav_out = outdir + f"/{theme}.wav"
        print(f"\n[run] Generating audio for theme '{theme}'")
        await generate_tts(lines, wav_out, engine_type, engine_voice)
        if file_exists_and_nonzero(wav_out):
            print("[run] Audio saved:", wav_out, "duration(s):", AudioFileClip(wav_out).duration)
        else:
            print("[run] Audio missing or empty:", wav_out)
        # images
        for i, line in enumerate(lines):
            prompt = f"{line}, editorial news photo, realistic, high detail"
            imgp = outdir + f"/images/{i+1}.png"
            generate_image(prompt, imgp)
            print("[run] Image saved:", imgp)

# Execute
asyncio.run(run_all())


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[edge] total voices available: 594 (showing Telugu subset first)
  TEL: te-IN-ShrutiNeural | Gender: Female | Name: Microsoft Server Speech Text to Speech Voice (te-IN, ShrutiNeural)
  TEL: te-IN-MohanNeural | Gender: Male | Name: Microsoft Server Speech Text to Speech Voice (te-IN, MohanNeural)

[edge] sample male (non-Telugu) voices:
  af-ZA-WillemNeural | Locale: af-ZA | Name: Microsoft Server Speech Text to Speech Voice (af-ZA, WillemNeural)
  am-ET-AmehaNeural | Locale: am-ET | Name: Microsoft Server Speech Text to Speech Voice (am-ET, AmehaNeural)
  ar-AE-HamdanNeural | Locale: ar-AE | Name: Microsoft Server Speech Text to Speech Voice (ar-AE, HamdanNeural)
  ar-BH-AliNeural | Locale: ar-BH | Name: Microsoft Server Speech Text to Speech Voice (ar-BH, AliNeural)
  ar-DZ-IsmaelNeural | Locale: ar-DZ | Name: Microsoft Server Speech Text to Speech Voice (ar-DZ, IsmaelNeural)
  ar-EG-ShakirNeural | Locale: ar-EG | Name: Microsoft Server Speech Text to Speech Voice (ar-EG, ShakirNeural

Token indices sequence length is longer than the specified maximum sequence length for this model (103 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ు త ు న ్ న ా య ి., editorial news photo , realistic , high detail']


[run] Audio saved: /content/output/political/political.wav duration(s): 15.46


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['న ం ప ్ రకట ిం చ ిం ద ి., editorial news photo , realistic , high detail']


[run] Image saved: /content/output/political/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['స ా గ ు త ోం ద ి., editorial news photo , realistic , high detail']


[run] Image saved: /content/output/political/images/2.png


  0%|          | 0/50 [00:00<?, ?it/s]

[run] Image saved: /content/output/political/images/3.png


In [5]:
# ======================================================
# FIXED Telugu AI News Generator with Male Voice (gTTS fallback)
# ======================================================
!apt-get update -qq
!apt-get install -y -qq ffmpeg espeak
!pip install -q gTTS edge-tts pyttsx3 moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime ffmpeg-python

import os, shutil, wave, struct, asyncio, nest_asyncio, cv2, torch, ffmpeg
from rembg import remove
from PIL import Image
from diffusers import DiffusionPipeline
import edge_tts
import pyttsx3
from gtts import gTTS
from moviepy.editor import *
nest_asyncio.apply()

# ---------------------------
# Config
# ---------------------------
anchor_video_path = "/content/anchor.mp4"
fps = 24
anchor_size = (320, 320)
anchor_position = ("right", "bottom")
TEMP_DIR = "/content/_temp"
os.makedirs(TEMP_DIR, exist_ok=True)

PREFERRED_VOICES = ["te-IN-MohanNeural", "te-IN-PrabhatNeural", "te-IN-ShrutiNeural"]

themes = {
    "political": [
        "హైderabad మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది."
    ]
}
prompts = {theme: [f"{theme} scene {i+1}" for i in range(len(themes[theme]))] for theme in themes}

# ---------------------------
# Stable Diffusion load
# ---------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
    pipe = DiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16 if device == "cuda" else torch.float32
    ).to(device)
    pipe.safety_checker = None
    print("[sd] pipeline loaded on", device)
except Exception as e:
    print("[sd] pipeline load failed:", e)
    pipe = None

# ---------------------------
# Helpers
# ---------------------------
def cleanup_temp():
    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)
    os.makedirs(TEMP_DIR, exist_ok=True)

def file_exists(p): return os.path.exists(p) and os.path.getsize(p) > 500

def write_silent_wav(path, dur=2.0, sr=22050):
    import wave, struct
    nframes = int(dur * sr)
    with wave.open(path, 'w') as w:
        w.setnchannels(1)
        w.setsampwidth(2)
        w.setframerate(sr)
        w.writeframes(struct.pack('<h',0)*nframes)
    return path

# ---------------------------
# TTS fallback: gTTS male (lower pitch)
# ---------------------------
def generate_gtts_male(text, out_wav, pitch_factor=0.85):
    tmp_mp3 = os.path.join(TEMP_DIR, "gtts_tmp.mp3")
    try:
        gTTS(text=text, lang="te").save(tmp_mp3)
        (
            ffmpeg
            .input(tmp_mp3)
            .filter("asetrate", int(22050*pitch_factor))
            .output(out_wav, ar=22050, ac=1)
            .overwrite_output()
            .run(quiet=True)
        )
        if file_exists(out_wav):
            print("[gTTS] male voice generated:", out_wav)
            return out_wav
    except Exception as e:
        print("[gTTS] failed:", e)
    write_silent_wav(out_wav, 2)
    return out_wav

# ---------------------------
# pyttsx3 helper
# ---------------------------
def synthesize_with_pyttsx3(text, out_wav):
    try:
        engine = pyttsx3.init()
        voices = engine.getProperty('voices')
        male_voice = None
        for v in voices:
            if "male" in v.name.lower():
                male_voice = v.id
                break
        if male_voice:
            engine.setProperty('voice', male_voice)
        engine.save_to_file(text, out_wav)
        engine.runAndWait()
        return file_exists(out_wav)
    except Exception as e:
        print("[pyttsx3] error:", e)
        return False

# ---------------------------
# Edge voice selection
# ---------------------------
async def test_edge_voice(name):
    tmp = os.path.join(TEMP_DIR, f"{name}.wav")
    try:
        await edge_tts.Communicate("నమస్కారం", voice=name).save(tmp)
        ok = file_exists(tmp)
        print(f"[edge test] {name}: {ok}")
        return ok
    except Exception as e:
        print(f"[edge test] {name} failed:", e)
        return False

async def choose_voice():
    try:
        voices = await edge_tts.list_voices()
        telugu = [v["ShortName"] for v in voices if v["Locale"].startswith("te-IN")]
    except:
        telugu = []
    for pref in PREFERRED_VOICES:
        if pref in telugu and await test_edge_voice(pref):
            return ("edge", pref)
    if synthesize_with_pyttsx3("నమస్కారం", os.path.join(TEMP_DIR, "py.wav")):
        return ("pyttsx3", None)
    return ("gtts", None)

# ---------------------------
# TTS generation wrapper
# ---------------------------
async def generate_tts(texts, out_audio, engine, voice=None):
    cleanup_temp()
    text = " ".join(texts)
    if engine == "edge":
        try:
            await edge_tts.Communicate(text, voice=voice).save(out_audio)
            if file_exists(out_audio):
                return out_audio
        except Exception as e:
            print("[edge] failed:", e)
    elif engine == "pyttsx3":
        if synthesize_with_pyttsx3(text, out_audio):
            return out_audio
    # fallback
    return generate_gtts_male(text, out_audio)

# ---------------------------
# Image generation
# ---------------------------
def generate_image(prompt, path):
    if pipe is None:
        Image.new("RGB", (720,480), color=(60,60,60)).save(path)
        return path
    try:
        img = pipe(prompt, width=720, height=480).images[0]
        img.save(path)
        return path
    except Exception as e:
        print("[img] failed:", e)
        Image.new("RGB", (720,480), color=(60,60,60)).save(path)
        return path

# ---------------------------
# Video generator
# ---------------------------
async def generate_video(theme, texts, prompts_list, engine, voice):
    outdir = f"/content/output/{theme}"
    os.makedirs(outdir, exist_ok=True)
    img_paths = []
    for i, p in enumerate(prompts_list):
        ip = f"{outdir}/img_{i}.png"
        generate_image(p, ip)
        img_paths.append(ip)
    audio_path = f"{outdir}/voice.wav"
    await generate_tts(texts, audio_path, engine, voice)
    audio_clip = AudioFileClip(audio_path)
    per_img = audio_clip.duration / len(img_paths)
    clips = [ImageClip(ip).set_duration(per_img) for ip in img_paths]
    video = concatenate_videoclips(clips).set_audio(audio_clip)
    out_path = f"/content/telugu_news_{theme}.mp4"
    video.write_videofile(out_path, fps=fps, codec="libx264", audio_codec="aac")
    print(f"[video] done: {out_path}")

# ---------------------------
# Main
# ---------------------------
async def run_all():
    engine, voice = await choose_voice()
    print("[chosen] engine:", engine, "voice:", voice)
    for t in themes:
        await generate_video(t, themes[t], prompts[t], engine, voice)
    print("✅ Done! Check /content/telugu_news_<theme>.mp4")

await run_all()

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[sd] pipeline loaded on cuda
[edge test] te-IN-MohanNeural failed: No audio was received. Please verify that your parameters are correct.
[edge test] te-IN-ShrutiNeural failed: No audio was received. Please verify that your parameters are correct.
[pyttsx3] error: SetVoiceByName failed with unknown return code -1 for voice: gmw/en
[chosen] engine: gtts voice: None


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

[gTTS] male voice generated: /content/output/political/voice.wav
Moviepy - Building video /content/telugu_news_political.mp4.
MoviePy - Writing audio in telugu_news_politicalTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/telugu_news_political.mp4





Moviepy - Done !
Moviepy - video ready /content/telugu_news_political.mp4
[video] done: /content/telugu_news_political.mp4
✅ Done! Check /content/telugu_news_<theme>.mp4


In [6]:
# ============================================================
# FINAL: Telugu AI News Video Pipeline
# - Male-like Telugu TTS (gTTS + pitch lowering)
# - Stable Diffusion images (if available)
# - Anchor background removal + overlay at ("right","bottom")
# - Optional: use uploaded WAV as voice (toggle below)
# ============================================================

# 1) Installs (run once)
!apt-get update -qq
!apt-get install -y -qq ffmpeg espeak
!pip install -q gTTS ffmpeg-python moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime edge-tts pyttsx3

# 2) Imports
import os, shutil, asyncio, nest_asyncio, time, math
from pathlib import Path
from gtts import gTTS
import ffmpeg
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips, CompositeVideoClip, VideoFileClip
from PIL import Image
import torch
from diffusers import DiffusionPipeline
from rembg import remove
import edge_tts, pyttsx3
nest_asyncio.apply()

# 3) Config - defaults and toggles
TEMP_DIR = "/content/_temp"
OUT_ROOT = "/content/output"
os.makedirs(TEMP_DIR, exist_ok=True); os.makedirs(OUT_ROOT, exist_ok=True)
FPS = 24
ANCHOR_VIDEO = "/content/anchor.mp4"                  # upload anchor.mp4 here to use it
ANCHOR_SIZE = (320, 320)
ANCHOR_POSITION = ("right", "bottom")                 # <<--- anchor location set to right-bottom
PITCH_FACTOR = 0.80                                   # <1 lowers pitch (0.8 ≈ -3 semitones)
USE_UPLOADED_WAV_AS_VOICE = False                     # set True to use uploaded WAV as final voice
UPLOADED_WAV = "/mnt/data/a22d6422-6664-457f-9eec-08a794c9ebda.wav"  # detected uploaded file (optional)

# 4) Themes (Telugu sample lines). Edit/extend as needed.
themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది."
    ],
    "sports": [
        "స్థానిక క్రికెట్ లీగ్ శ్రేష్ఠ ఆటగాడు రికార్డు స్థాపించాడు.",
        "కొత్త టోర్నమెంట్ విజయవంతంగా ముగిసింది."
    ],
    "technology": [
        "ఒక స్టార్టప్ కొత్త AI సాధనాన్ని విడుదల చేసింది.",
        "స్మార్ట్‌ఫోన్ ఉత్పత్తుల ధరలో తగ్గుదల కనిపించింది."
    ],
    "weather": [
        "రేపటి వద్ద వర్షాల అంచనా ఉందని అధికారులు హెచ్చరిస్తున్నారు.",
        "ఉత్తర ప్రాంతాల్లో చలికాలం ప్రారంభమవుతుంది."
    ]
}

# 5) Stable Diffusion initialization (optional; fallback to placeholders)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = None
try:
    print("[SD] loading Stable Diffusion (may take a minute)...")
    pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
                                             torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)
    pipe.safety_checker = None
    print("[SD] pipeline ready on", device)
except Exception as e:
    print("[SD] pipeline not available, using placeholder images.", e)
    pipe = None

# 6) Utilities
import wave, struct
def write_silent_wav(path, duration_s=1.0, sr=22050):
    n_frames = int(duration_s * sr)
    with wave.open(path,'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0) * n_frames)
    return path

def file_ok(p, min_size=200):
    return os.path.exists(p) and os.path.getsize(p) > min_size

def cleanup_temp():
    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)
    os.makedirs(TEMP_DIR, exist_ok=True)

# 7) Male-like TTS using gTTS + pitch lowering
def generate_male_voice_gtts(text, out_wav, pitch_factor=PITCH_FACTOR):
    """
    Generate Telugu TTS via gTTS, then lower pitch using ffmpeg asetrate filter.
    """
    cleanup_temp()
    tmp_mp3 = os.path.join(TEMP_DIR, "gtts_tmp.mp3")
    try:
        print("[TTS] generating via gTTS...")
        tts = gTTS(text=text, lang="te")
        tts.save(tmp_mp3)
        print("[TTS] applying pitch lowering (factor {:.2f})...".format(pitch_factor))
        (
            ffmpeg
            .input(tmp_mp3)
            .filter("asetrate", int(22050 * pitch_factor))
            .output(out_wav, ar=22050, ac=1)
            .overwrite_output()
            .run(quiet=True)
        )
        if os.path.exists(tmp_mp3):
            os.remove(tmp_mp3)
        if file_ok(out_wav):
            print("[TTS] saved male-like audio:", out_wav)
            return out_wav
        else:
            print("[TTS] gTTS produced empty/tiny file; making silent fallback")
    except Exception as e:
        print("[TTS] error:", e)
    write_silent_wav(out_wav, duration_s=max(1.0, len(text.split()) * 0.08))
    return out_wav

# 8) Optional: if uploaded WAV should be used as final voice for all themes
def use_uploaded_wav_if_requested(final_wav_path):
    """
    If USE_UPLOADED_WAV_AS_VOICE is True and UPLOADED_WAV exists, copy it to final_wav_path.
    Otherwise do nothing.
    """
    if USE_UPLOADED_WAV_AS_VOICE and file_ok(UPLOADED_WAV):
        shutil.copy(UPLOADED_WAV, final_wav_path)
        print("[VOICE] using uploaded wav as voice:", UPLOADED_WAV)
        return final_wav_path
    return None

# 9) Stable Diffusion image generator (or placeholder)
def build_prompt_from_text(text):
    return f"{text}, editorial news photo, realistic, high detail, cinematic lighting, 3:2 composition"

def generate_sd_image(prompt, out_path):
    if pipe is None:
        Image.new("RGB",(720,480),(60,60,60)).save(out_path)
        return out_path
    try:
        negative = "text, watermark, logo, signature, lowres"
        result = pipe(prompt, negative_prompt=negative, width=720, height=480, guidance_scale=7.5)
        img = result.images[0]
        img.save(out_path)
        return out_path
    except Exception as e:
        print("[SD] image gen failed:", e)
        Image.new("RGB",(720,480),(60,60,60)).save(out_path)
        return out_path

# 10) Anchor processing: extract frames & remove background, then return list of cleaned frames
def prepare_anchor_frames(anchor_path, max_seconds=8, out_dir="anchor_clean_frames"):
    if not os.path.exists(anchor_path):
        print("[anchor] anchor not found at", anchor_path)
        return []
    os.makedirs("anchor_frames", exist_ok=True)
    os.makedirs(out_dir, exist_ok=True)
    clip = VideoFileClip(anchor_path)
    clip_sub = clip.subclip(0, min(max_seconds, clip.duration)).resize(height=720)
    idx = 0
    for t in [i/FPS for i in range(int(min(max_seconds, clip_sub.duration) * FPS))]:
        frame = clip_sub.get_frame(t)
        fp = f"anchor_frames/frame_{idx:04d}.png"
        Image.fromarray(frame).save(fp)
        # try rembg removal
        try:
            img = Image.open(fp).convert("RGBA")
            out = remove(img)
            out.save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        except Exception as e:
            # fallback: save original frame
            Image.fromarray(frame).save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        idx += 1
    clip.close()
    frames = sorted([os.path.join(out_dir,f) for f in os.listdir(out_dir)])
    print("[anchor] prepared", len(frames), "frames")
    return frames

# 11) Build per-theme video
def build_theme_video(theme_name, lines, anchor_frames):
    out_dir = os.path.join(OUT_ROOT, theme_name)
    os.makedirs(out_dir, exist_ok=True)
    images_dir = os.path.join(out_dir, "images"); os.makedirs(images_dir, exist_ok=True)
    audio_dir = os.path.join(out_dir, "audio"); os.makedirs(audio_dir, exist_ok=True)

    # a) generate images
    image_paths = []
    for i, line in enumerate(lines):
        prompt = build_prompt_from_text(line)
        ip = os.path.join(images_dir, f"{i+1}.png")
        generate_sd_image(prompt, ip)
        image_paths.append(ip)
        print(f"[{theme_name}] image {i+1} -> {ip}")

    # b) generate audio (or use uploaded wav if toggled)
    joined_text = " । ".join(lines)
    final_audio = os.path.join(audio_dir, f"{theme_name}.wav")
    # check uploaded override
    if USE_UPLOADED_WAV_AS_VOICE and file_ok(UPLOADED_WAV):
        shutil.copy(UPLOADED_WAV, final_audio)
        print(f"[{theme_name}] used uploaded wav as final audio: {UPLOADED_WAV}")
    else:
        # synthesize male-like gTTS audio
        generate_male_voice_gtts(joined_text, final_audio, pitch_factor=PITCH_FACTOR)

    if not file_ok(final_audio):
        write_silent_wav(final_audio, duration_s=max(1.5, len(lines)*1.0))

    # c) create video timed to audio
    audio_clip = AudioFileClip(final_audio)
    per_image = max(0.8, audio_clip.duration / max(1, len(image_paths)))
    img_clips = []
    for p in image_paths:
        try:
            img_clips.append(ImageClip(p).set_duration(per_image).set_fps(FPS))
        except Exception as e:
            print("[video] image clip error:", e)
    if not img_clips:
        print("[video] no images, skipping theme", theme_name)
        return None
    news_video = concatenate_videoclips(img_clips, method="compose").set_audio(audio_clip)

    # d) overlay anchor if frames exist
    final_clip = news_video
    if anchor_frames:
        try:
            anchor_seq = [ImageClip(f).set_duration(1.0/FPS) for f in anchor_frames]
            anchor_clip = concatenate_videoclips(anchor_seq, method="compose").loop(duration=news_video.duration)
            anchor_clip = anchor_clip.resize(newsize=ANCHOR_SIZE).set_pos(ANCHOR_POSITION)
            final_clip = CompositeVideoClip([news_video, anchor_clip])
            print(f"[{theme_name}] anchor overlaid at position {ANCHOR_POSITION}")
        except Exception as e:
            print("[video] anchor overlay failed:", e)

    out_video = os.path.join(out_dir, f"{theme_name}.mp4")
    final_clip.write_videofile(out_video, fps=FPS, codec="libx264", audio_codec="aac")
    # cleanup
    try: final_clip.close()
    except: pass
    try: news_video.close()
    except: pass
    audio_clip.close()
    print(f"[{theme_name}] final video saved -> {out_video}")
    return out_video

# 12) Main driver
def run_all():
    print("=== Telugu AI News pipeline starting ===")
    # prepare anchor frames if anchor exists
    anchor_frames = []
    if os.path.exists(ANCHOR_VIDEO):
        try:
            anchor_frames = prepare_anchor_frames(ANCHOR_VIDEO)
        except Exception as e:
            print("[main] anchor processing error:", e)
            anchor_frames = []
    else:
        print("[main] anchor.mp4 not found; continuing without anchor overlay.")

    outputs = {}
    for theme, lines in themes.items():
        print("\n--- Generating theme:", theme)
        out = build_theme_video(theme, lines, anchor_frames)
        if out:
            outputs[theme] = out

    print("\nAll done. Videos saved under:", OUT_ROOT)
    return outputs

# 13) Run
outputs = run_all()
print("Produced outputs:", outputs)


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[SD] loading Stable Diffusion (may take a minute)...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[SD] pipeline ready on cuda
=== Telugu AI News pipeline starting ===


Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file '/root/.u2net/u2net.onnx'.
100%|████████████████████████████████████████| 176M/176M [00:00<00:00, 263GB/s]
Token indices sequence length is longer than the specified maximum sequence length for this model (111 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ు త ు న ్ న ా య ి., editorial news photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


[anchor] prepared 192 frames

--- Generating theme: political


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['న ం ప ్ రకట ిం చ ిం ద ి., editorial news photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


[political] image 1 -> /content/output/political/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['స ా గ ు త ోం ద ి., editorial news photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


[political] image 2 -> /content/output/political/images/2.png


  0%|          | 0/50 [00:00<?, ?it/s]

[political] image 3 -> /content/output/political/images/3.png
[TTS] generating via gTTS...
[TTS] applying pitch lowering (factor 0.80)...
[TTS] saved male-like audio: /content/output/political/audio/political.wav
[political] anchor overlaid at position ('right', 'bottom')
Moviepy - Building video /content/output/political/political.mp4.
MoviePy - Writing audio in politicalTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/political/political.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['్ థ ా ప ిం చ ా డ ు., editorial news photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


Moviepy - Done !
Moviepy - video ready /content/output/political/political.mp4
[political] final video saved -> /content/output/political/political.mp4

--- Generating theme: sports


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['cinematic lighting , 3 : 2 composition']


[sports] image 1 -> /content/output/sports/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[sports] image 2 -> /content/output/sports/images/2.png
[TTS] generating via gTTS...
[TTS] applying pitch lowering (factor 0.80)...
[TTS] saved male-like audio: /content/output/sports/audio/sports.wav
[sports] anchor overlaid at position ('right', 'bottom')
Moviepy - Building video /content/output/sports/sports.mp4.
MoviePy - Writing audio in sportsTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/sports/sports.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


Moviepy - Done !
Moviepy - video ready /content/output/sports/sports.mp4
[sports] final video saved -> /content/output/sports/sports.mp4

--- Generating theme: technology


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ద ి., editorial news photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


[technology] image 1 -> /content/output/technology/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[technology] image 2 -> /content/output/technology/images/2.png
[TTS] generating via gTTS...
[TTS] applying pitch lowering (factor 0.80)...
[TTS] saved male-like audio: /content/output/technology/audio/technology.wav
[technology] anchor overlaid at position ('right', 'bottom')
Moviepy - Building video /content/output/technology/technology.mp4.
MoviePy - Writing audio in technologyTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/technology/technology.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ర ి స ్ త ు న ్ న ా ర ు., editorial news photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


Moviepy - Done !
Moviepy - video ready /content/output/technology/technology.mp4
[technology] final video saved -> /content/output/technology/technology.mp4

--- Generating theme: weather


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['photo , realistic , high detail , cinematic lighting , 3 : 2 composition']


[weather] image 1 -> /content/output/weather/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[weather] image 2 -> /content/output/weather/images/2.png
[TTS] generating via gTTS...
[TTS] applying pitch lowering (factor 0.80)...
[TTS] saved male-like audio: /content/output/weather/audio/weather.wav
[weather] anchor overlaid at position ('right', 'bottom')
Moviepy - Building video /content/output/weather/weather.mp4.
MoviePy - Writing audio in weatherTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/weather/weather.mp4





Moviepy - Done !
Moviepy - video ready /content/output/weather/weather.mp4
[weather] final video saved -> /content/output/weather/weather.mp4

All done. Videos saved under: /content/output
Produced outputs: {'political': '/content/output/political/political.mp4', 'sports': '/content/output/sports/sports.mp4', 'technology': '/content/output/technology/technology.mp4', 'weather': '/content/output/weather/weather.mp4'}


In [7]:
from IPython.display import Video

Video("/content/output/political/political.mp4", embed=True, width=640, height=360)


In [8]:
# ==============================================================
# FINAL: Telugu AI News Video Pipeline (anchor bottom-right, SD images, male gTTS)
# Copy-paste the entire cell into Google Colab and run.
# Outputs: /content/output/<theme>/<theme>.mp4
# ==============================================================

# ---- Installs (run once) ----
!apt-get update -qq
!apt-get install -y -qq ffmpeg espeak
!pip install -q gTTS ffmpeg-python moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

# ---- Imports ----
import os, shutil, time, math, asyncio, nest_asyncio
from pathlib import Path
from gtts import gTTS
import ffmpeg
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips, CompositeVideoClip, VideoFileClip
from PIL import Image
import torch
from diffusers import DiffusionPipeline
from rembg import remove
nest_asyncio.apply()

# ---- Config ----
TEMP_DIR = "/content/_temp"
OUT_ROOT = "/content/output"
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(OUT_ROOT, exist_ok=True)

FPS = 24
ANCHOR_VIDEO = "/content/anchor.mp4"   # upload anchor.mp4 here if you want overlay
ANCHOR_MAX_SECONDS = 8                 # how many seconds of anchor to sample and loop (keeps processing small)
ANCHOR_SIZE = (320, 320)
ANCHOR_POSITION = ("right", "bottom")  # bottom-right
PITCH_FACTOR = 0.80                    # <1 lowers pitch (0.8 ≈ deeper)

# ---- Themes and Telugu lines (edit as needed) ----
themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది."
    ],
    "sports": [
        "స్థానిక క్రికెట్ లీగ్ శ్రేష్ఠ ఆటగాడు రికార్డు స్థాపించాడు.",
        "కొత్త టోర్నమెంట్ విజయవంతంగా ముగిసింది."
    ],
    "technology": [
        "ఒక స్టార్టప్ కొత్త AI సాధనాన్ని విడుదల చేసింది.",
        "స్మార్ట్‌ఫోన్ ఉత్పత్తుల ధరలో తగ్గుదల కనిపించింది."
    ],
    "weather": [
        "రేపటి వద్ద వర్షాల అంచనా ఉందని అధికారులు హెచ్చరిస్తున్నారు.",
        "ఉత్తర ప్రాంతాల్లో చలికాలం ప్రారంభమవుతుంది."
    ]
}

# ---- Stable Diffusion init (optional) ----
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = None
try:
    print("[SD] Loading Stable Diffusion pipeline (may take time)...")
    pipe = DiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16 if device=="cuda" else torch.float32
    ).to(device)
    pipe.safety_checker = None
    print("[SD] Ready on", device)
except Exception as e:
    print("[SD] Not available or failed to load; using placeholders. Error:", e)
    pipe = None

# ---- Utilities ----
import wave, struct
def write_silent_wav(path, duration_s=1.0, sr=22050):
    nframes = int(duration_s * sr)
    with wave.open(path, 'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0) * nframes)
    return path

def cleanup_temp():
    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)
    os.makedirs(TEMP_DIR, exist_ok=True)

def file_ok(path, min_size=200):
    return os.path.exists(path) and os.path.getsize(path) > min_size

# ---- Male-like Telugu TTS using gTTS + pitch lowering ----
def synthesize_male_gtts(text, out_wav, pitch_factor=PITCH_FACTOR):
    cleanup_temp()
    tmp_mp3 = os.path.join(TEMP_DIR, "gtts_temp.mp3")
    try:
        print("[TTS] generating gTTS mp3...")
        tts = gTTS(text=text, lang="te")
        tts.save(tmp_mp3)
        # lower pitch by changing sample rate then resample to 22050
        print(f"[TTS] lowering pitch (factor={pitch_factor}) and converting to WAV...")
        (
            ffmpeg
            .input(tmp_mp3)
            .filter("asetrate", int(22050 * pitch_factor))
            .output(out_wav, ar=22050, ac=1)
            .overwrite_output()
            .run(quiet=True)
        )
        if os.path.exists(tmp_mp3):
            os.remove(tmp_mp3)
        if file_ok(out_wav):
            print("[TTS] male-like WAV saved:", out_wav)
            return out_wav
    except Exception as e:
        print("[TTS] gTTS->WAV failed:", e)
    write_silent_wav(out_wav, duration_s=max(1.0, len(text.split())*0.08))
    return out_wav

# ---- SD image prompt builder (theme-aware) ----
def theme_prompt(theme, text):
    # Add theme-specific keywords to encourage relevant imagery
    theme_keywords = {
        "political": "political leaders, parliament, flags, crowd, press conference",
        "sports": "stadium, players in action, cheering crowd, trophies",
        "technology": "AI, circuit boards, futuristic devices, startup office",
        "weather": "storm clouds, heavy rain, sun breaking through clouds, weather map"
    }
    keywords = theme_keywords.get(theme, "")
    return f"{text}, {keywords}, editorial news photography, high detail, cinematic lighting"

def generate_image_for_text(theme, text, out_path):
    prompt = theme_prompt(theme, text)
    if pipe is None:
        # placeholder neutral image
        Image.new("RGB",(720,480),(70,70,70)).save(out_path)
        return out_path
    try:
        neg = "text, watermark, logo, signature, lowres"
        res = pipe(prompt, negative_prompt=neg, width=720, height=480, guidance_scale=7.5)
        img = res.images[0]
        img.save(out_path)
        return out_path
    except Exception as e:
        print("[SD] image failed:", e)
        Image.new("RGB",(720,480),(70,70,70)).save(out_path)
        return out_path

# ---- Anchor processing: extract frames, remove bg, return cleaned frames list ----
def prepare_anchor_frames(anchor_path, out_dir="anchor_clean_frames", max_seconds=ANCHOR_MAX_SECONDS):
    if not os.path.exists(anchor_path):
        print("[anchor] anchor file not found:", anchor_path)
        return []
    os.makedirs("anchor_frames", exist_ok=True)
    os.makedirs(out_dir, exist_ok=True)
    clip = VideoFileClip(anchor_path)
    dur = min(max_seconds, clip.duration)
    clip_sub = clip.subclip(0, dur).resize(height=720)  # keep reasonable resolution
    idx = 0
    for t in [i/FPS for i in range(int(dur*FPS))]:
        frame = clip_sub.get_frame(t)
        fp = f"anchor_frames/frame_{idx:04d}.png"
        Image.fromarray(frame).save(fp)
        try:
            img = Image.open(fp).convert("RGBA")
            out = remove(img)   # may raise if rembg not available
            out.save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        except Exception:
            # fallback: use original frame (no alpha)
            Image.fromarray(frame).save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        idx += 1
    clip.close()
    frames = sorted([os.path.join(out_dir,f) for f in os.listdir(out_dir)])
    print(f"[anchor] prepared {len(frames)} cleaned frames")
    return frames

# ---- Build per-theme video ----
def build_theme_video(theme_name, lines, anchor_frames):
    out_dir = os.path.join(OUT_ROOT, theme_name)
    imgs_dir = os.path.join(out_dir, "images"); os.makedirs(imgs_dir, exist_ok=True)
    aud_dir = os.path.join(out_dir, "audio"); os.makedirs(aud_dir, exist_ok=True)

    # 1) Generate per-line images (theme-aware)
    image_paths = []
    for i, text in enumerate(lines):
        ip = os.path.join(imgs_dir, f"{i+1}.png")
        generate_image_for_text(theme_name, text, ip)
        image_paths.append(ip)
        print(f"[{theme_name}] image saved: {ip}")

    # 2) Generate male TTS for joined text
    joined = " । ".join(lines)
    audio_out = os.path.join(aud_dir, f"{theme_name}.wav")
    synthesize_male_gtts(joined, audio_out, pitch_factor=PITCH_FACTOR)
    if not file_ok(audio_out):
        write_silent_wav(audio_out, duration_s=max(2.0, len(lines)*1.0))

    audio_clip = AudioFileClip(audio_out)
    total_dur = audio_clip.duration
    print(f"[{theme_name}] audio duration: {total_dur:.2f}s")

    # 3) Split images evenly across audio duration
    per_img = max(0.8, total_dur / max(1, len(image_paths)))
    clips = [ImageClip(p).set_duration(per_img).set_fps(FPS) for p in image_paths]
    news_clip = concatenate_videoclips(clips, method="compose").set_audio(audio_clip)

    # 4) Anchor overlay: create looping anchor clip that matches news duration and last full duration
    final = news_clip
    if anchor_frames:
        # anchor frames -> short looped clip
        anchor_seq = [ImageClip(f).set_duration(1.0/FPS) for f in anchor_frames]
        anchor_clip = concatenate_videoclips(anchor_seq, method="compose").loop(duration=news_clip.duration)
        anchor_clip = anchor_clip.resize(newsize=ANCHOR_SIZE).set_pos(ANCHOR_POSITION)
        # Ensure anchor_clip covers entire duration by set_duration (loop already does)
        final = CompositeVideoClip([news_clip, anchor_clip])
        print(f"[{theme_name}] anchor overlay applied (full duration) at {ANCHOR_POSITION}")
    else:
        print(f"[{theme_name}] no anchor frames; skipping overlay")

    # 5) Export final
    out_video = os.path.join(out_dir, f"{theme_name}.mp4")
    final.write_videofile(out_video, fps=FPS, codec="libx264", audio_codec="aac", threads=4)
    # cleanup
    try: final.close()
    except: pass
    try: news_clip.close()
    except: pass
    audio_clip.close()
    print(f"[{theme_name}] saved -> {out_video}")
    return out_video

# ---- Driver: run everything ----
def run_all():
    print("=== Starting Telugu AI News build ===")
    anchor_frames = []
    if os.path.exists(ANCHOR_VIDEO):
        try:
            anchor_frames = prepare_anchor_frames(ANCHOR_VIDEO)
        except Exception as e:
            print("[main] anchor prep failed:", e)
            anchor_frames = []
    else:
        print("[main] anchor.mp4 not present; proceeding without anchor")

    outputs = {}
    for theme, lines in themes.items():
        print("\n--- Building theme:", theme)
        try:
            out = build_theme_video(theme, lines, anchor_frames)
            outputs[theme] = out
        except Exception as ex:
            print(f"[main] failed for {theme}:", ex)

    print("\nAll done. Videos in:", OUT_ROOT)
    return outputs

# ---- Run ----
outputs = run_all()
print("Produced outputs:", outputs)

# ---- Quick preview & zip for download ----
from IPython.display import Video, display
# show one preview if exists
for t, path in outputs.items():
    if os.path.exists(path):
        print("Previewing:", path)
        display(Video(path, embed=True, width=640, height=360))
        break

# zip all outputs for download
zip_path = "/content/news_videos.zip"
shutil.make_archive(zip_path.replace(".zip",""), 'zip', OUT_ROOT)
print("Zipped outputs to", zip_path)


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[SD] Loading Stable Diffusion pipeline (may take time)...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[SD] Ready on cuda
=== Starting Telugu AI News build ===


Token indices sequence length is longer than the specified maximum sequence length for this model (116 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ు త ు న ్ న ా య ి., political leaders , parliament , flags , crowd , press conference , editorial news photography , high detail , cinematic lighting']


[anchor] prepared 192 cleaned frames

--- Building theme: political


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['న ం ప ్ రకట ిం చ ిం ద ి., political leaders , parliament , flags , crowd , press conference , editorial news photography , high detail , cinematic lighting']


[political] image saved: /content/output/political/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['స ా గ ు త ోం ద ి., political leaders , parliament , flags , crowd , press conference , editorial news photography , high detail , cinematic lighting']


[political] image saved: /content/output/political/images/2.png


  0%|          | 0/50 [00:00<?, ?it/s]

[political] image saved: /content/output/political/images/3.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch (factor=0.8) and converting to WAV...
[TTS] male-like WAV saved: /content/output/political/audio/political.wav
[political] audio duration: 21.03s
[political] anchor overlay applied (full duration) at ('right', 'bottom')
Moviepy - Building video /content/output/political/political.mp4.
MoviePy - Writing audio in politicalTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/political/political.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['్ థ ా ప ిం చ ా డ ు., stadium , players in action , cheering crowd , trophies , editorial news photography , high detail , cinematic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/political/political.mp4
[political] saved -> /content/output/political/political.mp4

--- Building theme: sports


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['trophies , editorial news photography , high detail , cinematic lighting']


[sports] image saved: /content/output/sports/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[sports] image saved: /content/output/sports/images/2.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch (factor=0.8) and converting to WAV...
[TTS] male-like WAV saved: /content/output/sports/audio/sports.wav
[sports] audio duration: 11.56s
[sports] anchor overlay applied (full duration) at ('right', 'bottom')
Moviepy - Building video /content/output/sports/sports.mp4.
MoviePy - Writing audio in sportsTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/sports/sports.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['circuit boards , futuristic devices , startup office , editorial news photography , high detail , cinematic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/sports/sports.mp4
[sports] saved -> /content/output/sports/sports.mp4

--- Building theme: technology


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ద ి., ai , circuit boards , futuristic devices , startup office , editorial news photography , high detail , cinematic lighting']


[technology] image saved: /content/output/technology/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[technology] image saved: /content/output/technology/images/2.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch (factor=0.8) and converting to WAV...
[TTS] male-like WAV saved: /content/output/technology/audio/technology.wav
[technology] audio duration: 11.98s
[technology] anchor overlay applied (full duration) at ('right', 'bottom')
Moviepy - Building video /content/output/technology/technology.mp4.
MoviePy - Writing audio in technologyTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/technology/technology.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ర ి స ్ త ు న ్ న ా ర ు., storm clouds , heavy rain , sun breaking through clouds , weather map , editorial news photography , high detail , cinematic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/technology/technology.mp4
[technology] saved -> /content/output/technology/technology.mp4

--- Building theme: weather


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', heavy rain , sun breaking through clouds , weather map , editorial news photography , high detail , cinematic lighting']


[weather] image saved: /content/output/weather/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[weather] image saved: /content/output/weather/images/2.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch (factor=0.8) and converting to WAV...
[TTS] male-like WAV saved: /content/output/weather/audio/weather.wav
[weather] audio duration: 12.08s
[weather] anchor overlay applied (full duration) at ('right', 'bottom')
Moviepy - Building video /content/output/weather/weather.mp4.
MoviePy - Writing audio in weatherTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/weather/weather.mp4





Moviepy - Done !
Moviepy - video ready /content/output/weather/weather.mp4
[weather] saved -> /content/output/weather/weather.mp4

All done. Videos in: /content/output
Produced outputs: {'political': '/content/output/political/political.mp4', 'sports': '/content/output/sports/sports.mp4', 'technology': '/content/output/technology/technology.mp4', 'weather': '/content/output/weather/weather.mp4'}
Previewing: /content/output/political/political.mp4


Zipped outputs to /content/news_videos.zip


In [9]:
# ==============================================================
# FINAL FIXED: Telugu AI News Video Generator
# - Anchor visible FULL duration (auto-loop)
# - Themed AI images (realistic news-style)
# - Telugu male-like TTS (gTTS + pitch lowering)
# ==============================================================

!apt-get update -qq
!apt-get install -y -qq ffmpeg espeak
!pip install -q gTTS ffmpeg-python moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

import os, shutil, math, ffmpeg, torch, asyncio, nest_asyncio
from gtts import gTTS
from moviepy.editor import *
from diffusers import DiffusionPipeline
from rembg import remove
from PIL import Image

nest_asyncio.apply()

# ---------------- CONFIG ----------------
FPS = 24
ANCHOR_VIDEO = "/content/anchor.mp4"
ANCHOR_SIZE = (320, 320)
ANCHOR_POSITION = ("right", "bottom")
PITCH_FACTOR = 0.82
TEMP_DIR = "/content/_temp"
OUT_ROOT = "/content/output"
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(OUT_ROOT, exist_ok=True)

themes = {
    "political": [
        "ప్రధాని నూతన విధానాన్ని ప్రకటించారు.",
        "ఆంధ్రప్రదేశ్ లో కొత్త ప్రాజెక్టు ప్రారంభమైంది.",
        "హైదరాబాద్ లో రాజకీయ సమావేశం నిర్వహించబడింది."
    ],
    "sports": [
        "భారత క్రికెట్ జట్టు అద్భుత విజయం సాధించింది.",
        "న్యూయార్క్ లో టెన్నిస్ ఫైనల్ జరిగింది."
    ],
    "technology": [
        "కృత్రిమ మేధస్సు ఆధారిత కొత్త యాప్ విడుదలైంది.",
        "స్మార్ట్‌ఫోన్ మార్కెట్‌లో కొత్త ఫీచర్లు వచ్చాయి."
    ],
    "weather": [
        "రేపు తెలంగాణ లో భారీ వర్షాలు కురిసే అవకాశం ఉంది.",
        "ఉత్తర భారతదేశంలో చలి తీవ్రత పెరుగుతోంది."
    ]
}

# ---------------- STABLE DIFFUSION INIT ----------------
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = None
try:
    print("[SD] Loading Stable Diffusion...")
    pipe = DiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16 if device=="cuda" else torch.float32
    ).to(device)
    pipe.safety_checker = None
    print("[SD] Ready.")
except Exception as e:
    print("[SD] Fallback mode:", e)
    pipe = None

# ---------------- UTILITIES ----------------
def cleanup_temp():
    if os.path.exists(TEMP_DIR): shutil.rmtree(TEMP_DIR)
    os.makedirs(TEMP_DIR, exist_ok=True)

def write_silent_wav(path, duration_s=1.0, sr=22050):
    import wave, struct
    nframes = int(duration_s * sr)
    with wave.open(path,'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0)*nframes)
    return path

def file_ok(path): return os.path.exists(path) and os.path.getsize(path)>200

# ---------------- TTS (gTTS + pitch lower) ----------------
def synthesize_male_tts(text, out_wav):
    cleanup_temp()
    tmp = os.path.join(TEMP_DIR, "tts.mp3")
    try:
        tts = gTTS(text=text, lang="te")
        tts.save(tmp)
        (
            ffmpeg
            .input(tmp)
            .filter("asetrate", int(22050*PITCH_FACTOR))
            .output(out_wav, ar=22050, ac=1)
            .overwrite_output()
            .run(quiet=True)
        )
        if file_ok(out_wav):
            print("[TTS] Saved male-like audio:", out_wav)
            return out_wav
    except Exception as e:
        print("[TTS] Failed:", e)
    write_silent_wav(out_wav, 3.0)
    return out_wav

# ---------------- IMAGE GENERATION ----------------
def theme_prompt(theme, text):
    prompts = {
        "political": "politician speaking, parliament, press conference, crowd, flags",
        "sports": "stadium, cricket players, trophies, cheering fans",
        "technology": "AI, robots, innovation, smartphone, tech background",
        "weather": "rain, sky, clouds, forecast, sunlight"
    }
    return f"{text}, {prompts.get(theme,'news scene')}, professional news photography, realistic lighting"

def generate_image(theme, text, path):
    if pipe is None:
        Image.new("RGB",(720,480),(80,80,80)).save(path)
        return path
    try:
        res = pipe(theme_prompt(theme, text), negative_prompt="text, watermark, lowres", width=720, height=480)
        res.images[0].save(path)
    except Exception as e:
        print("[SD] img fail:", e)
        Image.new("RGB",(720,480),(80,80,80)).save(path)
    return path

# ---------------- ANCHOR HANDLING ----------------
def prepare_anchor_frames():
    if not os.path.exists(ANCHOR_VIDEO):
        print("[Anchor] No file found.")
        return []
    clip = VideoFileClip(ANCHOR_VIDEO)
    os.makedirs("anchor_frames", exist_ok=True)
    os.makedirs("anchor_clean_frames", exist_ok=True)
    total_frames = int(clip.duration * FPS)
    for i in range(total_frames):
        frame = clip.get_frame(i/FPS)
        fp = f"anchor_frames/frame_{i:04d}.png"
        Image.fromarray(frame).save(fp)
        try:
            out = remove(Image.open(fp).convert("RGBA"))
            out.save(f"anchor_clean_frames/frame_{i:04d}.png")
        except:
            Image.fromarray(frame).save(f"anchor_clean_frames/frame_{i:04d}.png")
    clip.close()
    print(f"[Anchor] Frames prepared: {total_frames}")
    return sorted([os.path.join("anchor_clean_frames",f) for f in os.listdir("anchor_clean_frames")])

# ---------------- VIDEO BUILD ----------------
def build_theme_video(theme, lines, anchor_frames):
    out_dir = os.path.join(OUT_ROOT, theme)
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(os.path.join(out_dir,"images"), exist_ok=True)

    # Generate themed images
    img_paths=[]
    for i, line in enumerate(lines):
        ip=os.path.join(out_dir,"images",f"{i+1}.png")
        generate_image(theme, line, ip)
        img_paths.append(ip)
    print(f"[{theme}] {len(img_paths)} images ready.")

    # TTS
    text_joined=" ".join(lines)
    audio_out=os.path.join(out_dir,"audio.wav")
    synthesize_male_tts(text_joined,audio_out)
    audio_clip=AudioFileClip(audio_out)
    dur=audio_clip.duration
    per_img=dur/len(img_paths)
    img_clips=[ImageClip(p).set_duration(per_img) for p in img_paths]
    news_clip=concatenate_videoclips(img_clips,method="compose").set_audio(audio_clip)

    # Anchor overlay — now loops for FULL video duration
    if anchor_frames:
        frame_clips=[ImageClip(f).set_duration(1/FPS) for f in anchor_frames]
        anchor_seq=concatenate_videoclips(frame_clips,method="compose").loop(duration=news_clip.duration)
        anchor_clip=anchor_seq.resize(newsize=ANCHOR_SIZE).set_pos(ANCHOR_POSITION)
        final=CompositeVideoClip([news_clip,anchor_clip])
    else:
        final=news_clip

    out_video=os.path.join(out_dir,f"{theme}.mp4")
    final.write_videofile(out_video,fps=FPS,codec="libx264",audio_codec="aac")
    print(f"[{theme}] Saved: {out_video}")
    return out_video

# ---------------- MAIN RUN ----------------
def run_all():
    print("🚀 Generating Telugu AI News Videos...")
    anchor_frames=prepare_anchor_frames()
    outputs={}
    for theme,lines in themes.items():
        print("\n🎬 Theme:",theme)
        outputs[theme]=build_theme_video(theme,lines,anchor_frames)
    print("\n✅ All Done! Files saved in:",OUT_ROOT)
    return outputs

outputs=run_all()

from IPython.display import Video, display
for t,p in outputs.items():
    if os.path.exists(p):
        print("Preview:",p)
        display(Video(p,embed=True,width=640,height=360))
        break


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[SD] Loading Stable Diffusion...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[SD] Ready.
🚀 Generating Telugu AI News Videos...


Token indices sequence length is longer than the specified maximum sequence length for this model (82 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['news photography , realistic lighting']


[Anchor] Frames prepared: 229

🎬 Theme: political


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['., politician speaking , parliament , press conference , crowd , flags , professional news photography , realistic lighting']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['�., politician speaking , parliament , press conference , crowd , flags , professional news photography , realistic lighting']


  0%|          | 0/50 [00:00<?, ?it/s]

[political] 3 images ready.
[TTS] Saved male-like audio: /content/output/political/audio.wav
Moviepy - Building video /content/output/political/political.mp4.
MoviePy - Writing audio in politicalTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/political/political.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['cricket players , trophies , cheering fans , professional news photography , realistic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/political/political.mp4
[political] Saved: /content/output/political/political.mp4

🎬 Theme: sports


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['realistic lighting']


  0%|          | 0/50 [00:00<?, ?it/s]

[sports] 2 images ready.
[TTS] Saved male-like audio: /content/output/sports/audio.wav
Moviepy - Building video /content/output/sports/sports.mp4.
MoviePy - Writing audio in sportsTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/sports/sports.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ai , robots , innovation , smartphone , tech background , professional news photography , realistic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/sports/sports.mp4
[sports] Saved: /content/output/sports/sports.mp4

🎬 Theme: technology


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', robots , innovation , smartphone , tech background , professional news photography , realistic lighting']


  0%|          | 0/50 [00:00<?, ?it/s]

[technology] 2 images ready.
[TTS] Saved male-like audio: /content/output/technology/audio.wav
Moviepy - Building video /content/output/technology/technology.mp4.
MoviePy - Writing audio in technologyTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/technology/technology.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ద ి., rain , sky , clouds , forecast , sunlight , professional news photography , realistic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/technology/technology.mp4
[technology] Saved: /content/output/technology/technology.mp4

🎬 Theme: weather


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['forecast , sunlight , professional news photography , realistic lighting']


  0%|          | 0/50 [00:00<?, ?it/s]

[weather] 2 images ready.
[TTS] Saved male-like audio: /content/output/weather/audio.wav
Moviepy - Building video /content/output/weather/weather.mp4.
MoviePy - Writing audio in weatherTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/weather/weather.mp4





Moviepy - Done !
Moviepy - video ready /content/output/weather/weather.mp4
[weather] Saved: /content/output/weather/weather.mp4

✅ All Done! Files saved in: /content/output
Preview: /content/output/political/political.mp4


In [10]:
# ============================================================
# FINAL: Telugu AI News — Anchor (BG removed) bottom-right, SD images, male TTS
# Anchor source (uploaded): /mnt/data/552209c4-17d4-44bb-856e-3af139138d23.mp4
# ============================================================

# 1) Installs (run once)
!apt-get update -qq
!apt-get install -y -qq ffmpeg espeak
!pip install -q gTTS ffmpeg-python moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

# 2) Imports
import os, shutil, time, math, asyncio, nest_asyncio
from pathlib import Path
from gtts import gTTS
import ffmpeg
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips, CompositeVideoClip, VideoFileClip
from PIL import Image
import torch
from diffusers import DiffusionPipeline
from rembg import remove
nest_asyncio.apply()

# 3) Config
FPS = 24
TEMP_DIR = "/content/_temp"
OUT_ROOT = "/content/output"
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(OUT_ROOT, exist_ok=True)

# Use the uploaded anchor file (already on Colab drive)
ANCHOR_SOURCE = "/mnt/data/552209c4-17d4-44bb-856e-3af139138d23.mp4"
ANCHOR_MAX_SAMPLE_SECONDS = 15      # sample up to this many seconds from anchor to produce frames
ANCHOR_SIZE = (320, 320)
ANCHOR_POSITION = ("right", "bottom")   # bottom-right
PITCH_FACTOR = 0.82                     # lower for deeper voice (0.82~male-ish)

# 4) Themes (Telugu lines) — replace/extend as needed
themes = {
    "political": [
        "ప్రధాని నూతన విధానాన్ని ప్రకటించారు.",
        "ఆంధ్రప్రదేశ్ లో కొత్త ప్రాజెక్టు ప్రారంభమైంది.",
        "హైదరాబాద్ లో రాజకీయ సమావేశం నిర్వహించబడింది."
    ],
    "sports": [
        "భారత క్రికెట్ జట్టు అద్భుత విజయం సాధించింది.",
        "టోర్నమెంట్ ఫైనల్ ఉత్సాహభరితంగా ముగిసింది."
    ],
    "technology": [
        "ఒక స్టార్టప్ కొత్త AI సాధనాన్ని విడుదల చేసింది.",
        "స్మార్ట్‌ఫోన్ మార్కెట్లో కొత్త ఫీచర్లు వచ్చాయి."
    ],
    "weather": [
        "రేపు భారీ వర్షాల అవకాశం ఉంది.",
        "ఉత్తర ప్రాంతాల్లో చలికాలం మొదలవుతోంది."
    ]
}

# 5) Stable Diffusion init (optional, falls back to placeholders)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = None
try:
    print("[SD] Loading Stable Diffusion pipeline (may take time)...")
    pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
                                             torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)
    pipe.safety_checker = None
    print("[SD] pipeline ready on", device)
except Exception as e:
    print("[SD] Not available - will use placeholder images.", e)
    pipe = None

# ---------- Utility functions ----------
import wave, struct
def cleanup_temp():
    if os.path.exists(TEMP_DIR): shutil.rmtree(TEMP_DIR)
    os.makedirs(TEMP_DIR, exist_ok=True)

def write_silent_wav(path, duration_s=1.0, sr=22050):
    nframes = int(duration_s * sr)
    with wave.open(path,'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0) * nframes)
    return path

def file_ok(p, min_size=200):
    return os.path.exists(p) and os.path.getsize(p) > min_size

# ---------- Male-like Telugu TTS (gTTS + pitch lowering) ----------
def synthesize_male_gtts(text, out_wav, pitch_factor=PITCH_FACTOR):
    cleanup_temp()
    tmp_mp3 = os.path.join(TEMP_DIR, "gtts_tmp.mp3")
    try:
        print("[TTS] generating gTTS mp3...")
        tts = gTTS(text=text, lang="te")
        tts.save(tmp_mp3)
        print(f"[TTS] lowering pitch with factor {pitch_factor} and converting to WAV...")
        (
            ffmpeg
            .input(tmp_mp3)
            .filter("asetrate", int(22050 * pitch_factor))
            .output(out_wav, ar=22050, ac=1)
            .overwrite_output()
            .run(quiet=True)
        )
        if os.path.exists(tmp_mp3):
            os.remove(tmp_mp3)
        if file_ok(out_wav):
            return out_wav
    except Exception as e:
        print("[TTS] gTTS failed:", e)
    write_silent_wav(out_wav, duration_s=max(1.0, len(text.split())*0.08))
    return out_wav

# ---------- Theme-aware SD prompts ----------
def theme_prompts(theme, text):
    keywords = {
        "political": "politician speaking at podium, parliament building, flags, press conference, crowd",
        "sports": "stadium action, athletes, trophies, cheering crowd, sports photography",
        "technology": "AI robot, circuit board, futuristic lab, startup office, product launch",
        "weather": "storm clouds, weather radar map, heavy rain, sunlight breaking clouds"
    }
    kw = keywords.get(theme, "")
    # include the Telugu sentence first (helps multilingual SD models) and English keywords
    return f"'{text}' — {kw}, editorial news photography, realistic, high detail, cinematic lighting"

def generate_sd_image(theme, text, out_path):
    prompt = theme_prompts(theme, text)
    if pipe is None:
        Image.new("RGB",(720,480),(80,80,80)).save(out_path)
        return out_path
    try:
        neg = "text, watermark, logo, signature, lowres"
        res = pipe(prompt, negative_prompt=neg, width=720, height=480, guidance_scale=7.5)
        img = res.images[0]
        img.save(out_path)
        return out_path
    except Exception as e:
        print("[SD] image gen error:", e)
        Image.new("RGB",(720,480),(80,80,80)).save(out_path)
        return out_path

# ---------- Anchor: extract frames and remove background ----------
def prepare_anchor_clean_frames(anchor_source=ANCHOR_SOURCE, out_dir="anchor_clean_frames"):
    if not os.path.exists(anchor_source):
        print("[Anchor] No anchor source at", anchor_source)
        return []
    os.makedirs("anchor_frames", exist_ok=True)
    os.makedirs(out_dir, exist_ok=True)

    clip = VideoFileClip(anchor_source)
    # sample up to ANCHOR_MAX_SAMPLE_SECONDS or full length if shorter
    sample_dur = min(ANCHOR_MAX_SAMPLE_SECONDS, clip.duration)
    # we'll extract frames at FPS from the sampled segment (start from 0)
    frame_times = [i/FPS for i in range(int(sample_dur * FPS))]
    idx = 0
    for t in frame_times:
        frame = clip.get_frame(t)
        fp = f"anchor_frames/frame_{idx:04d}.png"
        Image.fromarray(frame).save(fp)
        # try rembg -> produce transparent png with alpha
        try:
            img = Image.open(fp).convert("RGBA")
            out = remove(img)  # rembg returns PIL Image or raises
            out.save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        except Exception as e:
            # fallback: save original (no alpha)
            Image.fromarray(frame).save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        idx += 1
    clip.close()
    frames = sorted([os.path.join(out_dir, f) for f in os.listdir(out_dir)])
    print(f"[Anchor] prepared {len(frames)} cleaned frames (sampled {sample_dur}s from anchor).")
    return frames

# ---------- Build single theme video (images + audio + anchor overlay looped full duration) ----------
def build_theme_video(theme_name, lines, anchor_frames):
    out_dir = os.path.join(OUT_ROOT, theme_name)
    imgs_dir = os.path.join(out_dir, "images"); os.makedirs(imgs_dir, exist_ok=True)
    aud_dir = os.path.join(out_dir, "audio"); os.makedirs(aud_dir, exist_ok=True)

    # 1) Generate per-line images (theme-aware)
    image_paths = []
    for i, txt in enumerate(lines):
        ip = os.path.join(imgs_dir, f"{i+1}.png")
        generate_sd_image(theme_name, txt, ip)
        image_paths.append(ip)
        print(f"[{theme_name}] image created: {ip}")

    # 2) Generate male-like TTS for full theme text
    joined = " । ".join(lines)
    audio_out = os.path.join(aud_dir, f"{theme_name}.wav")
    synthesize_male_gtts(joined, audio_out)
    if not file_ok(audio_out):
        write_silent_wav(audio_out, duration_s=max(2.0, len(lines)*1.0))

    audio_clip = AudioFileClip(audio_out)
    total_dur = audio_clip.duration
    print(f"[{theme_name}] audio duration: {total_dur:.2f}s")

    # 3) Create image clips timed to audio (split evenly)
    per_img = max(0.8, total_dur / max(1, len(image_paths)))
    img_clips = [ImageClip(p).set_duration(per_img).set_fps(FPS) for p in image_paths]
    news_clip = concatenate_videoclips(img_clips, method="compose").set_audio(audio_clip)

    # 4) Build anchor clip that loops to cover the entire news duration
    final_clip = news_clip
    if anchor_frames:
        # Anchor frames -> short clip that we loop for full duration
        anchor_frame_clips = [ImageClip(f).set_duration(1.0/FPS) for f in anchor_frames]
        anchor_seq = concatenate_videoclips(anchor_frame_clips, method="compose")
        anchor_loop = anchor_seq.loop(duration=news_clip.duration)
        anchor_loop = anchor_loop.resize(newsize=ANCHOR_SIZE).set_pos(ANCHOR_POSITION)
        final_clip = CompositeVideoClip([news_clip, anchor_loop])
        print(f"[{theme_name}] anchor composited and looped for full duration.")
    else:
        print(f"[{theme_name}] no anchor frames; skipping anchor overlay.")

    # 5) Export
    out_video = os.path.join(out_dir, f"{theme_name}.mp4")
    final_clip.write_videofile(out_video, fps=FPS, codec="libx264", audio_codec="aac", threads=4)
    # cleanup
    try: final_clip.close()
    except: pass
    try: news_clip.close()
    except: pass
    audio_clip.close()
    print(f"[{theme_name}] saved -> {out_video}")
    return out_video

# ---------- Main driver: prepare anchor frames, generate videos for each theme ----------
def run_all():
    print("Starting news generation...")
    anchor_frames = prepare_anchor_clean_frames(ANCHOR_SOURCE)
    outputs = {}
    for theme, lines in themes.items():
        print("\n--- Building theme:", theme)
        try:
            out = build_theme_video(theme, lines, anchor_frames)
            outputs[theme] = out
        except Exception as e:
            print(f"[Error] building {theme}:", e)
    print("\nAll done. Outputs saved under:", OUT_ROOT)
    return outputs

# Run the pipeline
outputs = run_all()

# Preview first produced video (if any) and create zip for download
from IPython.display import Video, display
for t, path in outputs.items():
    if os.path.exists(path):
        print("Previewing:", path)
        display(Video(path, embed=True, width=640, height=360))
        break

# Zip outputs for download
zip_file = "/content/news_videos.zip"
if os.path.exists(OUT_ROOT):
    shutil.make_archive(zip_file.replace(".zip",""), 'zip', OUT_ROOT)
    print("Zipped outputs ->", zip_file)
else:
    print("No outputs found to zip.")

# End
print("Pipeline finished. Check /content/output for each theme's .mp4 files.")


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[SD] Loading Stable Diffusion pipeline (may take time)...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (92 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['conference , crowd , editorial news photography , realistic , high detail , cinematic lighting']


[SD] pipeline ready on cuda
Starting news generation...
[Anchor] No anchor source at /mnt/data/552209c4-17d4-44bb-856e-3af139138d23.mp4

--- Building theme: political


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["�.' — politician speaking at podium , parliament building , flags , press conference , crowd , editorial news photography , realistic , high detail , cinematic lighting"]


[political] image created: /content/output/political/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["ి.' — politician speaking at podium , parliament building , flags , press conference , crowd , editorial news photography , realistic , high detail , cinematic lighting"]


[political] image created: /content/output/political/images/2.png


  0%|          | 0/50 [00:00<?, ?it/s]

[political] image created: /content/output/political/images/3.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch with factor 0.82 and converting to WAV...
[political] audio duration: 15.61s
[political] no anchor frames; skipping anchor overlay.
Moviepy - Building video /content/output/political/political.mp4.
MoviePy - Writing audio in politicalTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/political/political.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['stadium action , athletes , trophies , cheering crowd , sports photography , editorial news photography , realistic , high detail , cinematic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/political/political.mp4
[political] saved -> /content/output/political/political.mp4

--- Building theme: sports


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', athletes , trophies , cheering crowd , sports photography , editorial news photography , realistic , high detail , cinematic lighting']


[sports] image created: /content/output/sports/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[sports] image created: /content/output/sports/images/2.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch with factor 0.82 and converting to WAV...
[sports] audio duration: 10.35s
[sports] no anchor frames; skipping anchor overlay.
Moviepy - Building video /content/output/sports/sports.mp4.
MoviePy - Writing audio in sportsTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/sports/sports.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['ai robot , circuit board , futuristic lab , startup office , product launch , editorial news photography , realistic , high detail , cinematic lighting']


Moviepy - Done !
Moviepy - video ready /content/output/sports/sports.mp4
[sports] saved -> /content/output/sports/sports.mp4

--- Building theme: technology


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['— ai robot , circuit board , futuristic lab , startup office , product launch , editorial news photography , realistic , high detail , cinematic lighting']


[technology] image created: /content/output/technology/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[technology] image created: /content/output/technology/images/2.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch with factor 0.82 and converting to WAV...
[technology] audio duration: 11.34s
[technology] no anchor frames; skipping anchor overlay.
Moviepy - Building video /content/output/technology/technology.mp4.
MoviePy - Writing audio in technologyTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/technology/technology.mp4





Moviepy - Done !
Moviepy - video ready /content/output/technology/technology.mp4
[technology] saved -> /content/output/technology/technology.mp4

--- Building theme: weather


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['heavy rain , sunlight breaking clouds , editorial news photography , realistic , high detail , cinematic lighting']


[weather] image created: /content/output/weather/images/1.png


  0%|          | 0/50 [00:00<?, ?it/s]

[weather] image created: /content/output/weather/images/2.png
[TTS] generating gTTS mp3...
[TTS] lowering pitch with factor 0.82 and converting to WAV...
[weather] audio duration: 8.98s
[weather] no anchor frames; skipping anchor overlay.
Moviepy - Building video /content/output/weather/weather.mp4.
MoviePy - Writing audio in weatherTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/weather/weather.mp4





Moviepy - Done !
Moviepy - video ready /content/output/weather/weather.mp4
[weather] saved -> /content/output/weather/weather.mp4

All done. Outputs saved under: /content/output
Previewing: /content/output/political/political.mp4


Zipped outputs -> /content/news_videos.zip
Pipeline finished. Check /content/output for each theme's .mp4 files.


In [11]:
# ============================================================
# TELUGU AI NEWS GENERATOR (Final Stable Diffusion + Anchor BG Removed)
# ============================================================

!pip install edge-tts moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

import os, torch, asyncio, edge_tts, nest_asyncio, cv2
from rembg import remove
from PIL import Image
from diffusers import DiffusionPipeline
from moviepy.editor import *
from moviepy.video.fx import all as vfx

nest_asyncio.apply()

# ============================================================
# NEWS THEMES (Telugu Headlines)
# ============================================================

themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది.",
        "విజయవాడ నగరాభివృద్ధి ప్రాజెక్టులకు ఆమోదం లభించింది.",
        "అమరావతి లో నూతన విద్యా సంస్థలు ప్రారంభమయ్యాయి.",
    ],
    "technology": [
        "ఇస్రో చంద్రయాన్-4 మిషన్ విజయవంతంగా పూర్తయింది.",
        "భారత్ 6జి సాంకేతికతపై పరీక్షలు ప్రారంభించింది.",
        "హైదరాబాద్ లో కొత్త డేటా సెంటర్ ప్రారంభమైంది.",
        "డ్రోన్లు వ్యవసాయ రంగంలో విస్తరిస్తున్నాయి.",
        "సైబర్ సెక్యూరిటీ పై ప్రభుత్వ నూతన చట్టం ప్రవేశపెట్టింది.",
    ],
    "sports": [
        "భారత్ క్రికెట్ జట్టు సిరీస్‌లో ఘన విజయం సాధించింది.",
        "హైదరాబాద్‌లో అంతర్జాతీయ కబడ్డీ టోర్నమెంట్ విజయవంతమైంది.",
        "పీవీ సింధు ఆసియా బ్యాడ్మింటన్‌లో రజత పతకం గెలుచుకుంది.",
        "భారత హాకీ జట్టు అద్భుత ప్రదర్శన చేసింది.",
        "కబడ్డీ లీగ్‌లో హైదరాబాద్ జట్టు దూసుకుపోతోంది.",
    ],
    "health": [
        "హైదరాబాద్‌లో గాలి కాలుష్యం పెరిగింది.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం ఆరోగ్య యాప్ ప్రారంభించింది.",
        "వైద్య రంగంలో టెలీమెడిసిన్ సేవలు విస్తరిస్తున్నాయి.",
        "ప్రభుత్వ ఆసుపత్రుల్లో కొత్త యంత్రాలు ఏర్పాటు చేశారు.",
        "డెంగీ కేసులు తగ్గుముఖం పడుతున్నాయి.",
    ]
}

# AI PROMPTS (Better realism per theme)
prompt_style = {
    "political": "realistic photo of indian parliament, ministers, and public gatherings",
    "technology": "modern futuristic AI, robots, data center, and innovation visuals",
    "sports": "realistic cricket stadium, cheering crowd, indian players action shot",
    "health": "hospital interior, doctors, patients, and medical scenes in india"
}

# ============================================================
# SETTINGS
# ============================================================

anchor_video_path = "/content/anchor.mp4"  # upload your anchor video
anchor_clean_path = "/content/anchor_clean.mp4"
fps = 24
anchor_size = (320, 320)
anchor_position = ("right", "bottom")
device = "cuda" if torch.cuda.is_available() else "cpu"

# ============================================================
# LOAD STABLE DIFFUSION (AI Image Generator)
# ============================================================

pipe = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)

# ============================================================
# TTS: Telugu Male Voice (Edge-TTS)
# ============================================================

async def generate_tts(news_texts, output_audio):
    text = " ".join(news_texts)
    communicator = edge_tts.Communicate(text, voice="te-IN-MohanNeural", rate="-10%")
    await communicator.save(output_audio)

# ============================================================
# AI IMAGE GENERATION
# ============================================================

def generate_image(prompt, path):
    print(f"🖼️ Generating image for: {prompt}")
    image = pipe(prompt, width=720, height=480).images[0]
    image.save(path)

# ============================================================
# ANCHOR BACKGROUND REMOVAL
# ============================================================

def clean_anchor():
    os.makedirs("anchor_frames", exist_ok=True)
    os.makedirs("anchor_clean_frames", exist_ok=True)
    cap = cv2.VideoCapture(anchor_video_path)
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame_rgb)
        output = remove(img)  # remove background
        output.save(f"anchor_clean_frames/frame_{count:04d}.png")
        count += 1
    cap.release()
    print(f"✅ Removed background for {count} frames")

    # Recombine frames into video
    frames = sorted(os.listdir("anchor_clean_frames"))
    clips = [ImageClip(f"anchor_clean_frames/{f}").set_duration(1/fps) for f in frames]
    video = concatenate_videoclips(clips, method="compose")
    video.write_videofile(anchor_clean_path, fps=fps, codec="libx264", audio_codec="aac")
    print("🎬 Clean anchor video saved.")

# ============================================================
# VIDEO CREATION PER THEME
# ============================================================

async def generate_video(theme, news_items):
    print(f"🎥 Generating video for theme: {theme}")
    os.makedirs(f"output/{theme}/images", exist_ok=True)
    os.makedirs(f"output/{theme}/audio", exist_ok=True)

    # Generate images
    img_paths = []
    for i in range(len(news_items)):
        img_path = f"output/{theme}/images/{i+1}.png"
        generate_image(prompt_style[theme], img_path)
        img_paths.append(img_path)

    # Generate Telugu voice
    audio_path = f"output/{theme}/audio/news.wav"
    await generate_tts(news_items, audio_path)
    audio_clip = AudioFileClip(audio_path)
    per_image_duration = audio_clip.duration / len(img_paths)

    # Create video sequence
    news_clips = [ImageClip(img).set_duration(per_image_duration) for img in img_paths]
    news_video = concatenate_videoclips(news_clips, method="compose").set_audio(audio_clip)

    # Add anchor (loop full duration)
    anchor_clip = (
        VideoFileClip(anchor_clean_path)
        .without_audio()
        .resize(anchor_size)
        .loop(duration=news_video.duration)
        .set_pos(anchor_position)
    )

    final = CompositeVideoClip([news_video, anchor_clip])
    output_path = f"/content/output/telugu_news_{theme}.mp4"
    final.write_videofile(output_path, fps=fps, codec="libx264", audio_codec="aac")
    print(f"✅ Done: {output_path}")

# ============================================================
# FULL PIPELINE
# ============================================================

async def full_pipeline():
    os.makedirs("/content/output", exist_ok=True)
    print("🧹 Removing background from anchor video...")
    clean_anchor()
    for theme in themes:
        await generate_video(theme, themes[theme])
    print("🏁 All videos generated successfully!")

await full_pipeline()




Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

🧹 Removing background from anchor video...
✅ Removed background for 239 frames
Moviepy - Building video /content/anchor_clean.mp4.
Moviepy - Writing video /content/anchor_clean.mp4





Moviepy - Done !
Moviepy - video ready /content/anchor_clean.mp4
🎬 Clean anchor video saved.
🎥 Generating video for theme: political
🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

NoAudioReceived: No audio was received. Please verify that your parameters are correct.

In [12]:
# ============================================================
# TELUGU AI NEWS GENERATOR (Stable Diffusion + Background Removed Anchor + Fixed TTS)
# ============================================================

!pip install edge-tts moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

import os, torch, asyncio, edge_tts, nest_asyncio, cv2, random, time
from rembg import remove
from PIL import Image
from diffusers import DiffusionPipeline
from moviepy.editor import *
from moviepy.video.fx import all as vfx

nest_asyncio.apply()

# ============================================================
# NEWS THEMES
# ============================================================

themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగుతోంది.",
        "విజయవాడ నగరాభివృద్ధి ప్రాజెక్టులకు ఆమోదం లభించింది.",
        "అమరావతి లో నూతన విద్యా సంస్థలు ప్రారంభమయ్యాయి.",
    ],
    "technology": [
        "ఇస్రో చంద్రయాన్-4 మిషన్ విజయవంతంగా పూర్తయింది.",
        "భారత్ 6జి సాంకేతికతపై పరీక్షలు ప్రారంభించింది.",
        "హైదరాబాద్ లో కొత్త డేటా సెంటర్ ప్రారంభమైంది.",
        "డ్రోన్లు వ్యవసాయ రంగంలో విస్తరిస్తున్నాయి.",
        "సైబర్ సెక్యూరిటీ పై ప్రభుత్వ నూతన చట్టం ప్రవేశపెట్టింది.",
    ],
    "sports": [
        "భారత్ క్రికెట్ జట్టు సిరీస్‌లో ఘన విజయం సాధించింది.",
        "హైదరాబాద్‌లో అంతర్జాతీయ కబడ్డీ టోర్నమెంట్ విజయవంతమైంది.",
        "పీవీ సింధు ఆసియా బ్యాడ్మింటన్‌లో రజత పతకం గెలుచుకుంది.",
        "భారత హాకీ జట్టు అద్భుత ప్రదర్శన చేసింది.",
        "కబడ్డీ లీగ్‌లో హైదరాబాద్ జట్టు దూసుకుపోతోంది.",
    ],
    "health": [
        "హైదరాబాద్‌లో గాలి కాలుష్యం పెరిగింది.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం ఆరోగ్య యాప్ ప్రారంభించింది.",
        "వైద్య రంగంలో టెలీమెడిసిన్ సేవలు విస్తరిస్తున్నాయి.",
        "ప్రభుత్వ ఆసుపత్రుల్లో కొత్త యంత్రాలు ఏర్పాటు చేశారు.",
        "డెంగీ కేసులు తగ్గుముఖం పడుతున్నాయి.",
    ]
}

prompt_style = {
    "political": "realistic photo of indian parliament, ministers, and public gatherings",
    "technology": "modern futuristic AI, robots, data center, and innovation visuals",
    "sports": "realistic cricket stadium, cheering crowd, indian players action shot",
    "health": "hospital interior, doctors, patients, and medical scenes in india"
}

# ============================================================
# SETTINGS
# ============================================================

anchor_video_path = "/content/anchor.mp4"
anchor_clean_path = "/content/anchor_clean.mp4"
fps = 24
anchor_size = (320, 320)
anchor_position = ("right", "bottom")
device = "cuda" if torch.cuda.is_available() else "cpu"

# ============================================================
# LOAD STABLE DIFFUSION
# ============================================================

pipe = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)

# ============================================================
# FIXED EDGE-TTS FUNCTION (with fallback and retry)
# ============================================================

async def safe_tts(text, output_audio):
    voices = ["te-IN-MohanNeural", "te-IN-PrabhatNeural", "te-IN-ShrutiNeural"]
    sentences = text.split(".")  # Split long text
    tmp_files = []

    for idx, line in enumerate(sentences):
        if not line.strip():
            continue
        chunk_audio = f"/content/tmp_{idx}.mp3"
        success = False

        for attempt in range(3):
            try:
                voice = random.choice(voices)
                communicator = edge_tts.Communicate(line.strip(), voice=voice, rate="-10%")
                await communicator.save(chunk_audio)
                tmp_files.append(chunk_audio)
                success = True
                break
            except Exception as e:
                print(f"⚠️ TTS retry {attempt+1} for '{line[:15]}...' -> {e}")
                time.sleep(2)
        if not success:
            print(f"❌ Skipped line due to TTS failure: {line[:30]}")

    if not tmp_files:
        raise RuntimeError("No audio generated for any text chunk.")

    # Merge all parts
    final_audio = concatenate_audioclips([AudioFileClip(f) for f in tmp_files])
    final_audio.write_audiofile(output_audio, codec="aac")
    for f in tmp_files: os.remove(f)
    print(f"✅ Audio generated and saved: {output_audio}")

# ============================================================
# IMAGE GENERATION
# ============================================================

def generate_image(prompt, path):
    print(f"🖼️ Generating image for: {prompt}")
    image = pipe(prompt, width=720, height=480).images[0]
    image.save(path)

# ============================================================
# ANCHOR BACKGROUND REMOVAL
# ============================================================

def clean_anchor():
    os.makedirs("anchor_clean_frames", exist_ok=True)
    cap = cv2.VideoCapture(anchor_video_path)
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(frame_rgb)
        output = remove(img)
        output.save(f"anchor_clean_frames/frame_{count:04d}.png")
        count += 1
    cap.release()
    print(f"✅ Background removed from {count} frames")

    frames = sorted(os.listdir("anchor_clean_frames"))
    clips = [ImageClip(f"anchor_clean_frames/{f}").set_duration(1/fps) for f in frames]
    video = concatenate_videoclips(clips, method="compose")
    video.write_videofile(anchor_clean_path, fps=fps, codec="libx264", audio_codec="aac")
    print("🎬 Clean anchor video ready.")

# ============================================================
# VIDEO CREATION
# ============================================================

async def generate_video(theme, news_items):
    print(f"🎥 Generating video for: {theme}")
    os.makedirs(f"output/{theme}/images", exist_ok=True)
    os.makedirs(f"output/{theme}/audio", exist_ok=True)

    # Generate images
    img_paths = []
    for i in range(len(news_items)):
        img_path = f"output/{theme}/images/{i+1}.png"
        generate_image(prompt_style[theme], img_path)
        img_paths.append(img_path)

    # Generate voice
    audio_path = f"output/{theme}/audio/news.wav"
    await safe_tts(" ".join(news_items), audio_path)
    audio_clip = AudioFileClip(audio_path)
    per_image_duration = audio_clip.duration / len(img_paths)

    news_clips = [ImageClip(img).set_duration(per_image_duration) for img in img_paths]
    news_video = concatenate_videoclips(news_clips, method="compose").set_audio(audio_clip)

    anchor_clip = (
        VideoFileClip(anchor_clean_path)
        .without_audio()
        .resize(anchor_size)
        .loop(duration=news_video.duration)
        .set_pos(anchor_position)
    )

    final = CompositeVideoClip([news_video, anchor_clip])
    output_path = f"/content/output/telugu_news_{theme}.mp4"
    final.write_videofile(output_path, fps=fps, codec="libx264", audio_codec="aac")
    print(f"✅ Done: {output_path}")

# ============================================================
# FULL PIPELINE
# ============================================================

async def full_pipeline():
    os.makedirs("/content/output", exist_ok=True)
    print("🧹 Cleaning anchor video (removing background)...")
    clean_anchor()
    for theme in themes:
        await generate_video(theme, themes[theme])
    print("🏁 All videos generated successfully!")

await full_pipeline()




Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

🧹 Cleaning anchor video (removing background)...
✅ Background removed from 239 frames
Moviepy - Building video /content/anchor_clean.mp4.
Moviepy - Writing video /content/anchor_clean.mp4





Moviepy - Done !
Moviepy - video ready /content/anchor_clean.mp4
🎬 Clean anchor video ready.
🎥 Generating video for: political
🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

🖼️ Generating image for: realistic photo of indian parliament, ministers, and public gatherings


  0%|          | 0/50 [00:00<?, ?it/s]

⚠️ TTS retry 1 for 'హైదరాబాద్ మెట్ర...' -> No audio was received. Please verify that your parameters are correct.
⚠️ TTS retry 2 for 'హైదరాబాద్ మెట్ర...' -> No audio was received. Please verify that your parameters are correct.
⚠️ TTS retry 3 for 'హైదరాబాద్ మెట్ర...' -> No audio was received. Please verify that your parameters are correct.
❌ Skipped line due to TTS failure: హైదరాబాద్ మెట్రో విస్తరణ పనులు
⚠️ TTS retry 1 for ' ఆంధ్రప్రదేశ్ ప...' -> No audio was received. Please verify that your parameters are correct.
⚠️ TTS retry 2 for ' ఆంధ్రప్రదేశ్ ప...' -> No audio was received. Please verify that your parameters are correct.
⚠️ TTS retry 3 for ' ఆంధ్రప్రదేశ్ ప...' -> No audio was received. Please verify that your parameters are correct.
❌ Skipped line due to TTS failure:  ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త 
⚠️ TTS retry 1 for ' తెలంగాణ హరితహా...' -> No audio was received. Please verify that your parameters are correct.
⚠️ TTS retry 2 for ' తెలంగాణ హరితహా...' -> No audio was received. Plea

RuntimeError: No audio generated for any text chunk.

In [13]:
# FINAL FIXED: Telugu AI News pipeline with robust TTS (Edge-TTS -> fallback gTTS), SD images, anchor BG removal
# Paste entire cell into Colab and run.

# Installs (run once)
!apt-get update -qq
!apt-get install -y -qq ffmpeg espeak
!pip install -q edge-tts gTTS ffmpeg-python moviepy diffusers transformers torch torchvision torchaudio pillow scipy requests nest_asyncio rembg opencv-python onnxruntime

# Imports
import os, shutil, time, asyncio, nest_asyncio, random
from pathlib import Path
from gtts import gTTS
import edge_tts
import ffmpeg
from moviepy.editor import VideoFileClip, ImageClip, AudioFileClip, concatenate_videoclips, CompositeVideoClip
from PIL import Image
from rembg import remove
import torch
from diffusers import DiffusionPipeline

nest_asyncio.apply()

# ---------------- CONFIG ----------------
FPS = 24
TEMP_DIR = "/content/_temp"
OUT_ROOT = "/content/output"
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(OUT_ROOT, exist_ok=True)

ANCHOR_SOURCE = "/content/anchor.mp4"   # put your anchor here
ANCHOR_MAX_SAMPLE_SECONDS = 8
ANCHOR_SIZE = (320,320)
ANCHOR_POSITION = ("right","bottom")

PITCH_FACTOR = 0.82  # for gTTS (not used for edge chunks)

# Themes (sample)
themes = {
    "political": [
        "హైదరాబాద్ మెట్రో విస్తరణ పనులు వేగంగా కొనసాగుతున్నాయి.",
        "ఆంధ్రప్రదేశ్ ప్రభుత్వం కొత్త పారిశ్రామిక విధానం ప్రకటించింది.",
        "తెలంగాణ హరితహారం కార్యక్రమం మరింత ఉత్సాహంగా సాగುತ್ತಿದೆ."
    ],
    "sports": [
        "భారత్ క్రికెట్ జట్టు సిరీస్‌లో ఘన విజయం సాధించింది.",
        "హైదరాబాద్‌లో అంతర్జాతీయ కబడ్డీ టోర్నమెంట్ విజయవంతమైంది."
    ]
}

prompt_style = {
    "political": "realistic photo of indian parliament, ministers, and public gatherings",
    "sports": "stadium action, athletes, cheering crowd, trophies"
}

# Stable Diffusion init (optional)
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = None
try:
    print("[SD] loading pipeline...")
    pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5",
                                             torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)
    pipe.safety_checker = None
    print("[SD] ready on", device)
except Exception as e:
    print("[SD] not loaded, will use placeholders:", e)
    pipe = None

# ---------------- util ----------------
def cleanup_temp():
    if os.path.exists(TEMP_DIR):
        shutil.rmtree(TEMP_DIR)
    os.makedirs(TEMP_DIR, exist_ok=True)

def file_ok(p, min_size=200):
    return os.path.exists(p) and os.path.getsize(p) > min_size

def write_silence(path, duration=1.0, sr=22050):
    import wave, struct
    n = int(duration*sr)
    with wave.open(path,'w') as wf:
        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(sr)
        wf.writeframes(struct.pack('<h',0)*n)
    return path

# ---------------- TTS helpers ----------------
async def try_edge_chunk(text, out_wav, voice="te-IN-MohanNeural", attempts=2, wait=1.0):
    """Try to synthesize with edge-tts; if created file convert to WAV PCM16 22050"""
    tmp_out = out_wav + ".tmp"  # edge may write mp3/wav; normalize later
    for i in range(attempts):
        try:
            await edge_tts.Communicate(text, voice=voice, rate="-10%").save(tmp_out)
            # convert to WAV PCM16 22050
            standardized = out_wav
            (
                ffmpeg
                .input(tmp_out)
                .output(standardized, ar=22050, ac=1, sample_fmt='s16')
                .overwrite_output()
                .run(quiet=True)
            )
            if file_ok(standardized):
                if os.path.exists(tmp_out): os.remove(tmp_out)
                return True
        except Exception as e:
            # small backoff
            time.sleep(wait)
    # cleanup any tmp
    if os.path.exists(tmp_out): os.remove(tmp_out)
    return False

def gtts_chunk(text, out_wav, pitch_factor=None):
    """Synthesize with gTTS, then convert to WAV PCM16 22050; if pitch_factor provided, lower pitch via asetrate"""
    tmp_mp3 = out_wav + ".mp3"
    try:
        gTTS(text=text, lang="te").save(tmp_mp3)
        # convert mp3 -> wav (and optionally lower pitch)
        if pitch_factor and pitch_factor < 1.0:
            (
                ffmpeg
                .input(tmp_mp3)
                .filter("asetrate", int(22050 * pitch_factor))
                .output(out_wav, ar=22050, ac=1, sample_fmt='s16')
                .overwrite_output()
                .run(quiet=True)
            )
        else:
            (
                ffmpeg
                .input(tmp_mp3)
                .output(out_wav, ar=22050, ac=1, sample_fmt='s16')
                .overwrite_output()
                .run(quiet=True)
            )
        if os.path.exists(tmp_mp3): os.remove(tmp_mp3)
        return file_ok(out_wav)
    except Exception as e:
        if os.path.exists(tmp_mp3): os.remove(tmp_mp3)
        return False

async def generate_tts_for_lines(lines, out_audio):
    """
    For each line in lines (list), try Edge-TTS; if fails, fallback to gTTS.
    Concatenate resulting .wav chunks into out_audio WAV.
    """
    cleanup_temp()
    parts = []
    for idx, line in enumerate(lines):
        safe_line = line.strip()
        if not safe_line:
            continue
        part_wav = os.path.join(TEMP_DIR, f"part_{idx}.wav")
        ok = await try_edge_chunk(safe_line, part_wav, attempts=2, wait=1.0)
        if not ok:
            # fallback to gTTS
            ok = gtts_chunk(safe_line, part_wav, pitch_factor=PITCH_FACTOR)
        if not ok:
            # fallback silent short
            write_silence(part_wav, duration=1.2)
        parts.append(part_wav)

    if not parts:
        raise RuntimeError("No parts produced for TTS.")

    # Ensure all parts are PCM16 WAV 22050 and concat using ffmpeg concat demuxer
    list_txt = os.path.join(TEMP_DIR, "parts.txt")
    with open(list_txt, "w") as f:
        for p in parts:
            f.write(f"file '{p}'\n")

    # Merge
    try:
        cmd = (
            ffmpeg
            .input(list_txt, format='concat', safe=0)
            .output(out_audio, acodec='pcm_s16le', ar=22050, ac=1)
            .overwrite_output()
        )
        cmd.run(quiet=True)
    except Exception as e:
        # second fallback: use moviepy to concatenate
        clips = [AudioFileClip(p) for p in parts]
        final = concatenate_videoclips([])  # dummy to keep namespace
        from moviepy.editor import concatenate_audioclips
        a = concatenate_audioclips(clips)
        a.write_audiofile(out_audio, fps=22050, verbose=False, logger=None)
        a.close()
        for c in clips:
            try: c.close()
            except: pass

    if not file_ok(out_audio):
        # final last-resort: combine with silent filler
        write_silence(out_audio, duration= max(2.0, len(parts)*1.0))
    return out_audio

# ---------------- Image generation ----------------
def generate_image_for(theme, text, out_path):
    prompt = f"'{text}' — {prompt_style.get(theme,'news scene')}, editorial news photography, high detail, realistic"
    if pipe is None:
        Image.new("RGB",(720,480),(80,80,80)).save(out_path)
        return out_path
    try:
        res = pipe(prompt, negative_prompt="text, watermark, logo, lowres", width=720, height=480, guidance_scale=7.5)
        img = res.images[0]
        img.save(out_path)
        return out_path
    except Exception as e:
        Image.new("RGB",(720,480),(80,80,80)).save(out_path)
        return out_path

# ------------- Anchor background removal -------------
def prepare_anchor_clean_frames(anchor_source=ANCHOR_SOURCE, out_dir="anchor_clean_frames"):
    if not os.path.exists(anchor_source):
        print("[anchor] source not found:", anchor_source)
        return []
    os.makedirs("anchor_frames", exist_ok=True)
    os.makedirs(out_dir, exist_ok=True)
    clip = VideoFileClip(anchor_source)
    sample_dur = min(ANCHOR_MAX_SAMPLE_SECONDS, clip.duration)
    idx = 0
    for t in [i/FPS for i in range(int(sample_dur*FPS))]:
        frame = clip.get_frame(t)
        fp = f"anchor_frames/frame_{idx:04d}.png"
        Image.fromarray(frame).save(fp)
        try:
            img = Image.open(fp).convert("RGBA")
            out = remove(img)
            out.save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        except Exception:
            Image.fromarray(frame).save(os.path.join(out_dir, f"frame_{idx:04d}.png"))
        idx += 1
    clip.close()
    frames = sorted([os.path.join(out_dir,f) for f in os.listdir(out_dir)])
    print(f"[anchor] prepared {len(frames)} frames")
    return frames

# ------------- Build theme video -------------
def build_theme_video(theme, lines, anchor_frames):
    out_dir = os.path.join(OUT_ROOT, theme)
    os.makedirs(out_dir, exist_ok=True)
    imgs_dir = os.path.join(out_dir, "images"); os.makedirs(imgs_dir, exist_ok=True)
    aud_dir = os.path.join(out_dir, "audio"); os.makedirs(aud_dir, exist_ok=True)

    # images
    image_paths = []
    for i, line in enumerate(lines):
        ip = os.path.join(imgs_dir, f"{i+1}.png")
        generate_image_for(theme, line, ip)
        image_paths.append(ip)

    # audio (robust)
    audio_out = os.path.join(aud_dir, f"{theme}.wav")
    asyncio.get_event_loop().run_until_complete(generate_tts_for_lines(lines, audio_out))

    audio_clip = AudioFileClip(audio_out)
    total_dur = audio_clip.duration
    per_img = max(0.8, total_dur / max(1, len(image_paths)))
    clips = [ImageClip(p).set_duration(per_img).set_fps(FPS) for p in image_paths]
    news_clip = concatenate_videoclips(clips, method="compose").set_audio(audio_clip)

    # anchor looped and composited full duration
    final = news_clip
    if anchor_frames:
        anchor_seq = [ImageClip(f).set_duration(1.0/FPS) for f in anchor_frames]
        anchor_clip = concatenate_videoclips(anchor_seq, method="compose").loop(duration=news_clip.duration)
        anchor_clip = anchor_clip.resize(newsize=ANCHOR_SIZE).set_pos(ANCHOR_POSITION)
        final = CompositeVideoClip([news_clip, anchor_clip])
    # export
    out_video = os.path.join(out_dir, f"{theme}.mp4")
    final.write_videofile(out_video, fps=FPS, codec="libx264", audio_codec="aac", threads=4)
    try: final.close()
    except: pass
    try: news_clip.close()
    except: pass
    audio_clip.close()
    print(f"[done] {out_video}")
    return out_video

# ------------- Full pipeline -------------
def run_all():
    print("Preparing anchor frames (BG removal)...")
    anchor_frames = prepare_anchor_clean_frames()
    outputs = {}
    for theme, lines in themes.items():
        print("\n=== building", theme)
        out = build_theme_video(theme, lines, anchor_frames)
        outputs[theme] = out
    print("\nAll done. outputs:", outputs)
    return outputs

# Execute
outputs = run_all()
print(outputs)

# Optional: preview first video (if in notebook)
from IPython.display import Video, display
for t,p in outputs.items():
    if os.path.exists(p):
        display(Video(p, embed=True, width=640, height=360))
        break


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[SD] loading pipeline...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

[SD] ready on cuda
Preparing anchor frames (BG removal)...


Token indices sequence length is longer than the specified maximum sequence length for this model (117 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["� ు త ు న ్ న ా య ి.' — realistic photo of indian parliament , ministers , and public gatherings , editorial news photography , high detail , realistic"]


[anchor] prepared 239 frames

=== building political


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["� న ం ప ్ రకట ిం చ ిం ద ి.' — realistic photo of indian parliament , ministers , and public gatherings , editorial news photography , high detail , realistic"]


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["� స ా గ ು ತ ್ ತ ಿ ದ ೆ.' — realistic photo of indian parliament , ministers , and public gatherings , editorial news photography , high detail , realistic"]


  0%|          | 0/50 [00:00<?, ?it/s]

Moviepy - Building video /content/output/political/political.mp4.
MoviePy - Writing audio in politicalTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/political/political.mp4



The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["�ం ద ి.' — stadium action , athletes , cheering crowd , trophies , editorial news photography , high detail , realistic"]


Moviepy - Done !
Moviepy - video ready /content/output/political/political.mp4
[done] /content/output/political/political.mp4

=== building sports


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["�యవ ం తమ ైం ద ి.' — stadium action , athletes , cheering crowd , trophies , editorial news photography , high detail , realistic"]


  0%|          | 0/50 [00:00<?, ?it/s]

Moviepy - Building video /content/output/sports/sports.mp4.
MoviePy - Writing audio in sportsTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/output/sports/sports.mp4





Moviepy - Done !
Moviepy - video ready /content/output/sports/sports.mp4
[done] /content/output/sports/sports.mp4

All done. outputs: {'political': '/content/output/political/political.mp4', 'sports': '/content/output/sports/sports.mp4'}
{'political': '/content/output/political/political.mp4', 'sports': '/content/output/sports/sports.mp4'}
