In [None]:
!apt-get update && apt-get install -y imagemagick
!sed -i 's/<policy domain="path" rights="none" pattern="@\*" \/>/<!-- <policy domain="path" rights="none" pattern="@\*" \/> -->/g' /etc/ImageMagick-6/policy.xml
!echo 'faster_whisper==1.0.1' > requirements.txt
!echo 'ffmpeg==1.4' >> requirements.txt
!echo 'ffmpeg_python==0.2.0' >> requirements.txt
!echo 'moviepy==1.0.3' >> requirements.txt
!echo 'numpy<2.0' >> requirements.txt
!echo 'opencv_python==4.7.0.72' >> requirements.txt
!echo 'opencv_python_headless==4.9.0.80' >> requirements.txt
!echo 'python-dotenv==1.0.1' >> requirements.txt
!echo 'pytubefix' >> requirements.txt
!echo 'torch' >> requirements.txt
!echo 'webrtcvad-wheels' >> requirements.txt
!echo 'pydub' >> requirements.txt
!echo 'openai==0.28.1' >> requirements.txt
!echo 'google-generativeai' >> requirements.txt
!echo 'pysrt' >> requirements.txt
!echo '--extra-index-url https://download.pytorch.org/whl/cu121' >> requirements.txt
!pip install gradio
!pip install -r requirements.txt
!mkdir -p videos
!mkdir -p models
!wget -O models/deploy.prototxt https://raw.githubusercontent.com/faisu223/Faisal/feature/gradio-interface/models/deploy.prototxt
!wget -O models/res10_300x300_ssd_iter_140000_fp16.caffemodel https://raw.githubusercontent.com/faisu223/Faisal/feature/gradio-interface/models/res10_300x300_ssd_iter_140000_fp16.caffemodel
!wget -O haarcascade_frontalface_default.xml https://raw.githubusercontent.com/faisu223/Faisal/feature/gradio-interface/haarcascade_frontalface_default.xml

In [None]:
import os
import cv2
import numpy as np
import webrtcvad
import wave
import contextlib
from pydub import AudioSegment
from pytubefix import YouTube
import ffmpeg
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip, ImageClip
from moviepy.video.tools.subtitles import SubtitlesClip
from faster_whisper import WhisperModel
import torch
import openai
import json
import zipfile
import gradio as gr
import shutil
import google.generativeai as genai
import re

# --- Helper Functions ---
def parse_srt(srt_file_path):
    with open(srt_file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    dialogue_lines = [line.strip() for line in lines if not re.match(r'\d+', line.strip()) and '-->' not in line and line.strip() != '']
    return ' '.join(dialogue_lines)

# --- YouTubeDownloader --- 
def download_youtube_video(url, logger, progress):
    try:
        yt = YouTube(url)
        logger(f'Downloading video: {yt.title}')
        progress(0.1, desc=f'Downloading video: {yt.title}')
        video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
        if not video_stream:
            video_stream = yt.streams.filter(file_extension='mp4').order_by('resolution').desc().first()

        if not os.path.exists('videos'):
            os.makedirs('videos')

        video_file = video_stream.download(output_path='videos')
        logger(f'Downloaded: {yt.title} to videos folder')
        return video_file
    except Exception as e:
        logger(f'An error occurred during download: {str(e)}')
        return None

# --- Edit ---
def extractAudio(video_path, logger, progress):
    try:
        logger('Extracting audio...')
        progress(0.2, desc='Extracting audio...')
        video_clip = VideoFileClip(video_path)
        audio_path = 'audio.wav'
        video_clip.audio.write_audiofile(audio_path)
        video_clip.close()
        logger(f'Extracted audio to: {audio_path}')
        return audio_path
    except Exception as e:
        logger(f'An error occurred while extracting audio: {e}')
        return None

def crop_video(input_file, output_file, start_time, end_time, logger):
    logger(f'Cropping video from {start_time} to {end_time}')
    with VideoFileClip(input_file) as video:
        cropped_video = video.subclip(start_time, end_time)
        cropped_video.write_videofile(output_file, codec='libx264')
    logger(f'Video {output_file} cropped successfully.')

# --- Transcription ---
def transcribeAudio(audio_path, logger, progress):
    try:
        logger('Transcribing audio...')
        progress(0.3, desc='Transcribing audio...')
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        logger(f'Using device: {device}')
        model = WhisperModel('base.en', device=device, local_files_only=False)
        logger('Whisper model loaded.')
        segments, info = model.transcribe(audio=audio_path, beam_size=5, language='en')
        segments = list(segments)
        logger('Transcription complete.')
        progress(0.4, desc='Transcription complete.')
        trans_text = ''
        for seg in segments:
            trans_text += f'{seg.start} - {seg.end}: {seg.text.strip()}\n'
        return trans_text
    except Exception as e:
        logger(f'Transcription Error: {e}')
        return None

# --- LanguageTasks ---
def get_highlights(transcription, api_key, logger, progress, provider, max_clips, min_length, max_length):
    logger(f'Getting up to {max_clips} highlights from transcription using {provider}...')
    progress(0.5, desc=f'Getting highlights from transcription using {provider}...')
    
    system_prompt = f'''
    You are an expert viral video clip finder. Based on the provided transcription, identify up to {max_clips} of the most interesting, engaging, or viral-worthy segments. 
    Each segment must be between {min_length} and {max_length} seconds long.
    Return the results as a valid JSON array of objects. Each object must have 'start' and 'end' keys with the timestamps in seconds, and a 'content' key with the summary of the highlight.
    
    Example format:
    [
      {{
        "start": 42.5,
        "end": 88.0,
        "content": "A summary of the first highlight."
      }},
      {{
        "start": 123.1,
        "end": 160.7,
        "content": "A summary of the second highlight."
      }}
    ]
    
    Do not say anything else, just return the proper JSON. If you cannot find any highlights that fit the criteria, return an empty array [].
    '''
    
    try:
        if provider == 'OpenAI':
            openai.api_key = api_key
            response = openai.ChatCompletion.create(
                model='gpt-4o-2024-05-13',
                temperature=0.7,
                messages=[
                    {'role': 'system', 'content': system_prompt},
                    {'role': 'user', 'content': transcription},
                ],
            )
            json_string = response.choices[0].message.content
        elif provider == 'Gemini':
            genai.configure(api_key=api_key)
            model = genai.GenerativeModel('gemini-1.5-flash')
            response = model.generate_content(system_prompt + '\n\n' + transcription)
            json_string = response.text
        else:
            raise ValueError('Invalid AI provider selected')
            
        json_string = json_string.strip().replace('json', '').replace('```', '')
        highlights = json.loads(json_string)
        logger(f'Found {len(highlights)} highlights.')
        return highlights
    except Exception as e:
        logger(f'Error in GetHighlight: {e}')
        return []

# --- Finalization ---
def burn_srt_to_video(video_path, srt_path, output_path, logger):
    logger(f'Burning SRT to video: {output_path}')
    video = VideoFileClip(video_path)
    # It's better to create a custom subtitle clip for this
    # For now, we'll assume the srt_file provided is for the whole video
    # A more advanced version would create a new SRT for the clip's duration
    generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white', bg_color='black')
    subtitles = SubtitlesClip(srt_path, generator).set_pos(('center','bottom'))
    result = CompositeVideoClip([video, subtitles])
    result.write_videofile(output_path, codec='libx264')
    logger('SRT burned to video.')

def add_watermark(video_path, watermark_path, output_path, logger):
    logger(f'Adding watermark to: {output_path}')
    video = VideoFileClip(video_path)
    watermark = (ImageClip(watermark_path)
                 .set_duration(video.duration)
                 .resize(height=50) # Tweak size as needed
                 .margin(right=8, top=8, opacity=0) # Tweak position as needed
                 .set_pos(('right','top')))
    result = CompositeVideoClip([video, watermark])
    result.write_videofile(output_path, codec='libx264')
    logger('Watermark added.')

# --- Main Processing Function ---
def generate_shorts(video_file, srt_file, provider, openai_api_key, gemini_api_key, min_length, max_length, max_clips, aspect_ratio, burn_srt, export_srt, output_name, watermark_file, progress=gr.Progress()):
    logs = []
    def logger(message):
        logs.append(message)
        print(message)

    logger('Starting short generation process...')
    progress(0, desc='Starting...')

    video_path = video_file.name
    if not video_path:
        logger('Video upload failed.')
        return None, '\n'.join(logs)

    trans_text = None
    if srt_file is not None:
        logger('SRT file provided. Skipping transcription.')
        progress(0.4, desc='Parsing SRT file...')
        trans_text = parse_srt(srt_file.name)
    else:
        logger('No SRT file provided. Starting automatic transcription.')
        audio_path = extractAudio(video_path, logger, progress)
        if not audio_path:
            logger('Audio extraction failed.')
            return None, '\n'.join(logs)
        trans_text = transcribeAudio(audio_path, logger, progress)

    if not trans_text:
        logger('Failed to get transcription from either SRT or Whisper.')
        return None, '\n'.join(logs)

    api_key = openai_api_key if provider == 'OpenAI' else gemini_api_key
    highlights = get_highlights(trans_text, api_key, logger, progress, provider, max_clips, min_length, max_length)

    if not highlights:
        logger('Failed to get any highlights from the AI. Try adjusting the length parameters or using a different video.')
        return None, '\n'.join(logs)

    output_filename_prefix = output_name if output_name else 'short'
    final_files = []

    for i, highlight in enumerate(highlights):
        start_time = highlight['start']
        end_time = highlight['end']
        logger(f'--- Processing clip {i+1}/{len(highlights)}: {start_time}s - {end_time}s ---')
        progress((i+1)/len(highlights), desc=f'Processing clip {i+1}/{len(highlights)}')

        clip_path = f'{output_filename_prefix}_{i+1}.mp4'
        crop_video(video_path, clip_path, start_time, end_time, logger)

        if burn_srt and srt_file:
            subtitled_path = f'{output_filename_prefix}_{i+1}_subtitled.mp4'
            burn_srt_to_video(clip_path, srt_file.name, subtitled_path, logger)
            clip_path = subtitled_path

        if watermark_file:
            watermarked_path = f'{output_filename_prefix}_{i+1}_watermarked.mp4'
            add_watermark(clip_path, watermark_file.name, watermarked_path, logger)
            clip_path = watermarked_path
        
        final_files.append(clip_path)
    
    logger(f'Generated {len(final_files)} clips.')
    zip_path = f'{output_filename_prefix}_results.zip'
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for file in final_files:
            zipf.write(file)
        if export_srt and srt_file:
            zipf.write(srt_file.name)
        with open('transcription.txt', 'w') as f:
            f.write(trans_text)
        zipf.write('transcription.txt')
        
    logger('Process finished.')
    progress(1, desc='Finished!')
    
    return zip_path, '\n'.join(logs)



In [None]:
with gr.Blocks() as demo:
    gr.Markdown('# AI Youtube Shorts Generator')
    with gr.Row():
        with gr.Column():
            video_file = gr.File(label='Upload Long Video')
            srt_file = gr.File(label='Upload SRT File (Optional)')
            provider = gr.Dropdown(['OpenAI', 'Gemini'], label='AI Provider', value='OpenAI')
            openai_api_key = gr.Textbox(label='OpenAI API Key', type='password', visible=True)
            gemini_api_key = gr.Textbox(label='Gemini API Key', type='password', visible=False)
            with gr.Row():
                min_length = gr.Slider(10, 120, value=30, label='Minimum length (s)')
                max_length = gr.Slider(15, 180, value=60, label='Maximum length (s)')
            max_clips = gr.Slider(1, 10, value=3, step=1, label='Max Number of Shorts')
            aspect_ratio = gr.Dropdown(['9:16', '16:9', '1:1'], label='Aspect Ratio', value='9:16')
            with gr.Row():
                burn_srt = gr.Checkbox(label='Burn SRT into Video')
                export_srt = gr.Checkbox(label='Export SRT Separately')
            output_name = gr.Textbox(label='Output File Name Prefix', placeholder='e.g., my_awesome_short')
            watermark_file = gr.File(label='Upload Watermark (Optional)')
            generate_button = gr.Button('Generate Shorts')
        with gr.Column():
            logs = gr.Textbox(label='Logs', lines=15, interactive=False)
            output_zip = gr.File(label='Download Results ZIP')

    def toggle_api_key(provider_choice):
        if provider_choice == 'OpenAI':
            return gr.update(visible=True), gr.update(visible=False)
        else:
            return gr.update(visible=False), gr.update(visible=True)

    provider.change(toggle_api_key, provider, [openai_api_key, gemini_api_key])

    generate_button.click(
        fn=generate_shorts,
        inputs=[
            video_file,
            srt_file,
            provider,
            openai_api_key,
            gemini_api_key,
            min_length,
            max_length,
            max_clips,
            aspect_ratio,
            burn_srt,
            export_srt,
            output_name,
            watermark_file,
        ],
        outputs=[output_zip, logs]
    )

demo.launch(debug=True, share=True)