In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI 
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import os


In [27]:
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
llm = ChatGoogleGenerativeAI(model = "gemini-1.5-pro"	 , temperature = 0.7 , max_tokens=200)


In [28]:
GROQ_API_KEY  = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
llm = ChatGroq(model="Gemma2-9b-It")


In [29]:
from langchain.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_template(
    "Write 5 short captions (max 8 words each) for a social video ad. Use a hook and storytelling style. Topic: {topic}"
)

def script_writer(state: dict) -> dict:
    prompt = prompt_template.format_messages(topic=state["prompt"])
    response = llm(prompt)
    text = response.content.strip()
    print(response)
    captions = [line.strip("-• ").strip() for line in text.split("\n") if line.strip()]
    return {**state, "captions": captions}


In [30]:
state = {"prompt": "Advertisement for Car"}
state = script_writer(state)
state["captions"]


content="Here are 5 short captions (max 8 words each) for a car social video ad, using a hook and storytelling style:\n\n1. **Craving adventure? This car is ready.**\n2. **Escape the ordinary.  Hit the open road.**\n3. **One drive.  Endless possibilities.**\n4. **Where will your journey take you?**\n5. **Built for life.  Live it to the fullest.** \n\n\nLet me know if you'd like more options! \n" additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 109, 'prompt_tokens': 39, 'total_tokens': 148, 'completion_time': 0.198181818, 'prompt_time': 0.002275503, 'queue_time': 0.24867157699999998, 'total_time': 0.200457321}, 'model_name': 'Gemma2-9b-It', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None} id='run--02c521bd-9d01-4fda-adc1-29a018bfab56-0' usage_metadata={'input_tokens': 39, 'output_tokens': 109, 'total_tokens': 148}


['Here are 5 short captions (max 8 words each) for a car social video ad, using a hook and storytelling style:',
 '1. **Craving adventure? This car is ready.**',
 '2. **Escape the ordinary.  Hit the open road.**',
 '3. **One drive.  Endless possibilities.**',
 '4. **Where will your journey take you?**',
 '5. **Built for life.  Live it to the fullest.**',
 "Let me know if you'd like more options!"]

In [31]:
from gtts import gTTS

def voice_actor(state: dict) -> dict:
    captions = state["captions"]
    output_dir = "outputs/audio"
    os.makedirs(output_dir, exist_ok=True)
    audio_paths = []

    for i, caption in enumerate(captions):
        file_path = os.path.join(output_dir, f"caption_{i}.mp3")
        gTTS(text=caption, lang='en').save(file_path)
        audio_paths.append(file_path)

    return {**state, "audio_paths": audio_paths}


In [32]:
state = voice_actor(state)
state["audio_paths"]


['outputs/audio/caption_0.mp3',
 'outputs/audio/caption_1.mp3',
 'outputs/audio/caption_2.mp3',
 'outputs/audio/caption_3.mp3',
 'outputs/audio/caption_4.mp3',
 'outputs/audio/caption_5.mp3',
 'outputs/audio/caption_6.mp3']

In [None]:
import os
import subprocess


image_path = "/Users/akashjain/Desktop/AI_Video/Backend/Notebook/outputs/images/image_6.png"
audio_mp3_path = "/Users/akashjain/Desktop/AI_Video/Backend/Notebook/outputs/audio/caption_0.mp3"
audio_wav_path = "audio.wav"  # Temp file
model_path = "Wav2Lip.pth"


In [None]:

# Step 1: Convert MP3 to 16kHz mono WAV
print("🔁 Converting MP3 to WAV...")
subprocess.run([
    "ffmpeg", "-y", "-i", audio_mp3_path,
    "-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le", audio_wav_path
], check=True)

# Step 2: Run Wav2Lip
print("🎬 Running Wav2Lip inference...")
subprocess.run([
    "python3", "inference.py",
    "--checkpoint_path", model_path,
    "--face", image_path,
    "--audio", audio_wav_path
], check=True)

print("\n✅ Done! Output video saved to: results/result_voice.mp4")
