# üé¨ Apex SEO Training Video Transcriber

This notebook will:
1. Connect to your Google Drive
2. Find all videos in the Apex SEO training folder
3. Transcribe each video using OpenAI Whisper (free, local)
4. Save transcripts to your Drive

**Instructions:**
1. Click `Runtime` ‚Üí `Change runtime type` ‚Üí Select `T4 GPU`
2. Run each cell in order (Shift+Enter)
3. Authorize Google Drive access when prompted
4. Wait for transcription to complete (~30-60 min for all videos)

In [None]:
# Cell 1: Install dependencies
!pip install -q openai-whisper
!pip install -q ffmpeg-python
print("‚úÖ Dependencies installed")

In [None]:
# Cell 2: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("‚úÖ Google Drive mounted")

In [None]:
# Cell 3: Find the Apex SEO training folder
import os
import glob

# Search for the folder
base_paths = [
    '/content/drive/MyDrive',
    '/content/drive/Shareddrives',
    '/content/drive/My Drive'
]

apex_folder = None
for base in base_paths:
    if os.path.exists(base):
        # Search for Apex SEO training folder
        for root, dirs, files in os.walk(base):
            if 'Apex SEO training' in dirs:
                apex_folder = os.path.join(root, 'Apex SEO training')
                break
            # Also check if current folder is the one
            if os.path.basename(root) == 'Apex SEO training':
                apex_folder = root
                break
        if apex_folder:
            break

# If not found in My Drive, it might be in Shared with me
# For shared folders, we need to create a shortcut or access differently
if not apex_folder:
    print("‚ö†Ô∏è Folder not found in My Drive.")
    print("")
    print("Since this is a shared folder, please:")
    print("1. Go to Google Drive in your browser")
    print("2. Find 'Apex SEO training' in 'Shared with me'")
    print("3. Right-click ‚Üí 'Organize' ‚Üí 'Add shortcut'")
    print("4. Add shortcut to 'My Drive'")
    print("5. Re-run this cell")
    print("")
    print("Or enter the full path manually below:")
    # Manual override
    # apex_folder = '/content/drive/MyDrive/Apex SEO training'
else:
    print(f"‚úÖ Found folder: {apex_folder}")
    
    # List video files
    video_files = glob.glob(os.path.join(apex_folder, '*.mp4'))
    video_files.sort()
    
    print(f"\nüìπ Found {len(video_files)} video files:")
    total_size = 0
    for vf in video_files:
        size_mb = os.path.getsize(vf) / (1024*1024)
        total_size += size_mb
        print(f"  ‚Ä¢ {os.path.basename(vf)} ({size_mb:.1f} MB)")
    print(f"\nüìä Total size: {total_size/1024:.2f} GB")

In [None]:
# Cell 4: Load Whisper model
import whisper
import torch

# Check GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üñ•Ô∏è Using device: {device}")

if device == "cuda":
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

# Load model - 'medium' is good balance of speed/accuracy
# Options: tiny, base, small, medium, large
print("\n‚è≥ Loading Whisper model (this takes ~1 minute)...")
model = whisper.load_model("medium", device=device)
print("‚úÖ Whisper model loaded!")

In [None]:
# Cell 5: Transcribe all videos
import os
import json
from datetime import datetime

# Create output folder
output_folder = os.path.join(apex_folder, 'transcripts')
os.makedirs(output_folder, exist_ok=True)

# Track results
results = []
failed = []

print(f"üìù Starting transcription of {len(video_files)} videos...")
print(f"üìÅ Saving transcripts to: {output_folder}")
print("="*60)

for i, video_path in enumerate(video_files, 1):
    filename = os.path.basename(video_path)
    transcript_path = os.path.join(output_folder, filename.replace('.mp4', '.txt'))
    json_path = os.path.join(output_folder, filename.replace('.mp4', '.json'))
    
    # Skip if already transcribed
    if os.path.exists(transcript_path):
        print(f"\n[{i}/{len(video_files)}] ‚è≠Ô∏è Skipping {filename} (already transcribed)")
        continue
    
    print(f"\n[{i}/{len(video_files)}] üé¨ Transcribing: {filename}")
    start_time = datetime.now()
    
    try:
        # Transcribe
        result = model.transcribe(
            video_path,
            language="en",
            verbose=False
        )
        
        # Save plain text transcript
        with open(transcript_path, 'w', encoding='utf-8') as f:
            f.write(result['text'])
        
        # Save JSON with timestamps
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump({
                'filename': filename,
                'text': result['text'],
                'segments': result['segments'],
                'language': result['language']
            }, f, indent=2, ensure_ascii=False)
        
        elapsed = (datetime.now() - start_time).total_seconds()
        print(f"   ‚úÖ Done in {elapsed:.1f}s - {len(result['text'])} chars")
        
        results.append({
            'filename': filename,
            'transcript_path': transcript_path,
            'chars': len(result['text']),
            'time_seconds': elapsed
        })
        
    except Exception as e:
        print(f"   ‚ùå Error: {str(e)}")
        failed.append({'filename': filename, 'error': str(e)})

print("\n" + "="*60)
print(f"\nüéâ Transcription complete!")
print(f"   ‚úÖ Successful: {len(results)}")
print(f"   ‚ùå Failed: {len(failed)}")
print(f"\nüìÅ Transcripts saved to: {output_folder}")

In [None]:
# Cell 6: Combine all transcripts into one file
import os
import glob

# Find all transcript files
transcript_files = sorted(glob.glob(os.path.join(output_folder, '*.txt')))

combined_path = os.path.join(apex_folder, 'ALL_TRANSCRIPTS_COMBINED.txt')

print(f"üìö Combining {len(transcript_files)} transcripts...")

with open(combined_path, 'w', encoding='utf-8') as outfile:
    outfile.write("=" * 80 + "\n")
    outfile.write("APEX SEO TRAINING - COMPLETE TRANSCRIPTS\n")
    outfile.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n")
    outfile.write("=" * 80 + "\n\n")
    
    for i, tf in enumerate(transcript_files, 1):
        filename = os.path.basename(tf)
        outfile.write(f"\n{'='*80}\n")
        outfile.write(f"VIDEO {i}: {filename.replace('.txt', '.mp4')}\n")
        outfile.write(f"{'='*80}\n\n")
        
        with open(tf, 'r', encoding='utf-8') as infile:
            content = infile.read()
            outfile.write(content)
            outfile.write("\n")

file_size = os.path.getsize(combined_path) / 1024
print(f"\n‚úÖ Combined transcript saved!")
print(f"üìÑ File: {combined_path}")
print(f"üìä Size: {file_size:.1f} KB")
print(f"\nüí° You can now share this file with your AI assistant for analysis.")

---
## ‚úÖ Done!

Your transcripts are now saved in the `Apex SEO training/transcripts/` folder.

A combined file `ALL_TRANSCRIPTS_COMBINED.txt` is also in the main folder.

**Next steps:**
1. Download `ALL_TRANSCRIPTS_COMBINED.txt` from Google Drive
2. Share it with your AI assistant for SEO strategy extraction