# Transcribe TikTok / Instagram Reel to Brazilian Portuguese
This notebook contains the code from `transcribe_url.py` adapted for notebook usage.

Run the first cell to install Python dependencies. Ensure `ffmpeg` is installed on your system (or run the `apt` command in Colab).

In [4]:
# Ensure ffmpeg and required Python packages are available (Colab / Windows / Linux)
import sys
import shutil
import subprocess
import os
import tempfile
import urllib.request
import zipfile
import platform


def is_admin():
    try:
        if os.name == 'nt':
            import ctypes
            return ctypes.windll.shell32.IsUserAnAdmin() != 0
        else:
            return os.geteuid() == 0
    except Exception:
        return False


def ensure_ffmpeg():
    if shutil.which('ffmpeg'):
        print('ffmpeg already available at', shutil.which('ffmpeg'))
        return

    # Try system install when running with admin privileges
    try:
        if 'google.colab' in sys.modules:
            print('Installing ffmpeg via apt (Colab)...')
            subprocess.run(['apt-get', 'update', '-y'], check=True)
            subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], check=True)
        elif platform.system() == 'Windows':
            if is_admin():
                print('Running system install for ffmpeg on Windows (admin)')
                choco = shutil.which('choco')
                winget = shutil.which('winget')
                if choco:
                    try:
                        subprocess.run([choco, 'install', 'ffmpeg', '-y'], check=True)
                    except Exception as e:
                        print('choco install failed:', e)
                elif winget:
                    try:
                        subprocess.run([winget, 'install', '--id', 'Gyan.FFmpeg', '-e'], check=True)
                    except Exception as e:
                        print('winget install failed:', e)
                else:
                    print('No system package manager (choco/winget) found — cannot perform automatic system install')
            else:
                print('Not running as admin — will install ffmpeg for this session only')
                # fall through to session-only logic below

            # After attempted system install, if still not present, do session-only download
            if not shutil.which('ffmpeg'):
                print('Downloading ffmpeg static build for Windows (session-only)...')
                url = 'https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip'
                tmp = tempfile.mkdtemp()
                zip_path = os.path.join(tmp, 'ffmpeg.zip')
                urllib.request.urlretrieve(url, zip_path)
                with zipfile.ZipFile(zip_path, 'r') as z:
                    z.extractall(tmp)
                ff_bin = None
                for root, dirs, files in os.walk(tmp):
                    if 'ffmpeg.exe' in files:
                        ff_bin = root
                        break
                if not ff_bin:
                    raise RuntimeError('ffmpeg.exe not found in downloaded archive')
                os.environ['PATH'] = ff_bin + os.pathsep + os.environ.get('PATH', '')
                print('Added to PATH for this session:', ff_bin)

        else:
            # Linux non-Colab: try system install if admin, otherwise attempt apt and report
            if is_admin():
                print('Running system apt-get install for ffmpeg (Linux, admin)...')
                subprocess.run(['apt-get', 'update', '-y'], check=True)
                subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], check=True)
            else:
                print('Not admin — attempting apt-get install (may fail without permissions)')
                try:
                    subprocess.run(['apt-get', 'update', '-y'], check=True)
                    subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], check=True)
                except Exception as e:
                    print('Automatic apt-get install failed:', e)
                    print('You can install ffmpeg system-wide or request admin to install it.')
    except Exception as e:
        print('Could not install ffmpeg automatically:', e)
        print('Please install ffmpeg manually and restart the kernel if needed.')
    finally:
        if shutil.which('ffmpeg'):
            print('ffmpeg available at', shutil.which('ffmpeg'))
        else:
            print('ffmpeg not found in PATH')

def ensure_py_packages():
    to_install = []
    try:
        import whisper  # noqa: F401
    except Exception:
        to_install.append('openai-whisper')
    try:
        import yt_dlp as _yt  # noqa: F401
    except Exception:
        to_install.append('yt-dlp')
    if to_install:
        try:
            print('Installing Python packages:', to_install)
            subprocess.run([sys.executable, '-m', 'pip', 'install', *to_install], check=True)
        except Exception as e:
            print('pip install failed:', e)
    else:
        print('Python packages already installed')


# Run installers/checks
ensure_ffmpeg()
ensure_py_packages()

# Quick verification
print('\nVerification:')
print('ffmpeg ->', shutil.which('ffmpeg'))
try:
    import whisper, yt_dlp  # noqa: F401
    print('whisper and yt_dlp import OK')
except Exception as e:
    print('Import error after install attempt:', e)


ffmpeg already available at C:\Users\p\AppData\Local\Microsoft\WinGet\Links\ffmpeg.EXE
Python packages already installed

Verification:
ffmpeg -> C:\Users\p\AppData\Local\Microsoft\WinGet\Links\ffmpeg.EXE
whisper and yt_dlp import OK


In [5]:
# Transcribe functions (adapted from transcribe_url.py)
import os
import tempfile
import shutil
import subprocess

def run(cmd):
    subprocess.run(cmd, check=True)

def download_video(url, out_dir):
    out_template = os.path.join(out_dir, "video.%(ext)s")
    cmd = ["yt-dlp", "-f", "bestaudio/best", "-o", out_template, url]
    run(cmd)
    for f in os.listdir(out_dir):
        if f.startswith("video.") or f.startswith("video"):
            return os.path.join(out_dir, f)
    raise FileNotFoundError("Downloaded video not found")

def extract_audio(video_path, out_wav):
    cmd = ["ffmpeg", "-y", "-i", video_path, "-ar", "16000", "-ac", "1", "-vn", "-c:a", "pcm_s16le", out_wav]
    run(cmd)

def transcribe_with_whisper(audio_path, model_size, language):
    import whisper
    lang = language
    if language and language.lower().startswith("pt"):
        lang = "pt"
    model = whisper.load_model(model_size)
    result = model.transcribe(audio_path, language=lang, task="transcribe")
    return result

def write_srt(transcript, out_path):
    def fmt_ts(s):
        h = int(s // 3600)
        m = int((s % 3600) // 60)
        sec = s % 60
        ms = int((sec - int(sec)) * 1000)
        sec_i = int(sec)
        return f"{h:02}:{m:02}:{sec_i:02},{ms:03}"
    with open(out_path, "w", encoding="utf-8") as f:
        for i, seg in enumerate(transcript.get("segments", []), start=1):
            f.write(f"{i}\n")
            f.write(f"{fmt_ts(seg['start'])} --> {fmt_ts(seg['end'])}\n")
            f.write(seg["text"].strip() + "\n\n")

def transcribe_url(url, model="small", out="meu_transcrito.txt", srt=True, language="pt-BR"):
    tmp = tempfile.mkdtemp(prefix="transcribe_url_")
    try:
        print("Downloading video (via yt-dlp)...")
        video_path = download_video(url, tmp)
        audio_path = os.path.join(tmp, "audio.wav")
        print("Extracting audio (ffmpeg)...")
        extract_audio(video_path, audio_path)
        print("Transcribing with Whisper (this may take a while)...")
        result = transcribe_with_whisper(audio_path, model, language=language)
        text = result.get("text", "").strip()
        return text
    finally:
        shutil.rmtree(tmp)

# Test

In [2]:
# Example call: set a public TikTok or Instagram Reel URL and run
url = "https://www.instagram.com/reel/DO7DgylDWW7/"  # replace with your URL
# Optionally change model/out name
transcript = transcribe_url(url, model="small", out="meu_transcrito.txt", srt=True, language="pt-BR")
print("Transcript (first 1000 chars):")
print(transcript[:1000])

Downloading video (via yt-dlp)...
Extracting audio (ffmpeg)...
Transcribing with Whisper (this may take a while)...




Transcript (first 1000 chars):
As pessoas já conhecem um físico turista. Então eu treinando lá desse jeitinho aqui mesmo, sem nada do meu jeito, ali na minha essência, no meu aldeizinho. As pessoas vinham, pediram para tirar foto. Congratulations! Falei, porra, que legal!
