In [None]:
import os
import shutil
import zipfile
import cv2
import numpy as np
import whisper
import subprocess
from pydub import AudioSegment
from moviepy.editor import VideoFileClip
from pathlib import Path

In [None]:
# Caminho do dataset
DATASET_PATH = "/kaggle/input/fakenwes-dataset/b5-correlaction-main"
OUTPUT_DIR = "preprocessed_data"
ZIP_PATH = "processed_data.zip"

# Criar diretório de saída
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

def extract_audio(video_path, audio_path):
    """Extrai o áudio do vídeo e converte para WAV."""
    video = VideoFileClip(video_path)
    if video.audio is None:
        print(f"Aviso: {video_path} não contém áudio!")
        return
    audio = video.audio
    audio.write_audiofile(audio_path)
    sound = AudioSegment.from_file(audio_path, format="mp4")
    sound.export(audio_path, format="wav")

def transcribe_audio(audio_path):
    """Transcreve o áudio usando Whisper."""
    model = whisper.load_model("base")
    result = model.transcribe(audio_path)
    return result["text"]

def replace_audio_ffmpeg(video_path, new_audio_path, output_path):
    """Substitui o áudio de um vídeo pelo novo áudio usando ffmpeg."""
    command = [
        "ffmpeg", "-i", video_path, "-i", new_audio_path,
        "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0",
        "-shortest", output_path, "-y"
    ]
    subprocess.run(command, check=True)

In [None]:
if __name__ == "__main__":
    video_files = [f for f in os.listdir(DATASET_PATH) if f.endswith(".mp4")]
    
    audio_paths = {}
    video_paths = {}
    
    for video_file in video_files:
        video_path = os.path.join(DATASET_PATH, video_file)
        video_name = os.path.splitext(video_file)[0]
        
        output_video_folder = os.path.join(OUTPUT_DIR, video_name)
        os.makedirs(output_video_folder, exist_ok=True)
        
        audio_path = os.path.join(output_video_folder, "audio.wav")
        output_video_path = os.path.join(output_video_folder, "modified_" + video_file)
        
        print(f"Extraindo áudio de {video_file}...")
        extract_audio(video_path, audio_path)
        
        if os.path.exists(audio_path):
            audio_paths[video_file] = audio_path
            video_paths[video_file] = video_path
        else:
            print(f"Erro ao extrair áudio de {video_file}, pulando...")

    # Trocar os áudios entre Video1.mp4 e Video8.mp4
    video1_audio = audio_paths.get("Video1.mp4")
    video8_audio = audio_paths.get("Video8.mp4")
    video1_path = video_paths.get("Video1.mp4")
    video8_path = video_paths.get("Video8.mp4")
    output_video1_path = os.path.join(OUTPUT_DIR, "modified_Video1.mp4")
    output_video8_path = os.path.join(OUTPUT_DIR, "modified_Video8.mp4")
    
    if video1_audio and video8_audio and video1_path and video8_path:
        print("Trocando áudios entre Video1.mp4 e Video8.mp4...")
        replace_audio_ffmpeg(video1_path, video8_audio, output_video1_path)
        replace_audio_ffmpeg(video8_path, video1_audio, output_video8_path)
        print("Áudios trocados e vídeos salvos.")

    # Criar arquivo ZIP contendo todos os áudios e vídeos
    with zipfile.ZipFile(ZIP_PATH, "w") as zipf:
        for video_file, audio_path in audio_paths.items():
            zipf.write(audio_path, os.path.join("audios", os.path.basename(audio_path)))
        for video_file, video_path in video_paths.items():
            zipf.write(video_path, os.path.join("videos", os.path.basename(video_path)))
        zipf.write(output_video1_path, os.path.basename(output_video1_path))
        zipf.write(output_video8_path, os.path.basename(output_video8_path))
    
    print("Todos os áudios e vídeos foram compactados em", ZIP_PATH)