<a href="https://colab.research.google.com/github/citronlegacy/Video-to-Screenshots/blob/main/Youtube-Video-to-Screenshots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Youtube Video to Screenshots

| |GitHub| Colab | |
|:--|:-:|:-:|:-:|
| 🎬 **Video To Screenshots** | [![GitHub](https://img.shields.io/badge/GitHub-Visit-brightgreen.svg)](https://github.com/citronlegacy/Video-to-Screenshots/blob/main/Video-to-Screenshots.ipynb) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/citronlegacy/Video-to-Screenshots/blob/main/Video-to-Screenshots.ipynb) |
| 🎬 **Youtube Video to Screenshots** | [![GitHub](https://img.shields.io/badge/GitHub-Visit-brightgreen.svg)](https://github.com/citronlegacy/Video-to-Screenshots/blob/main/Youtube-Video-to-Screenshots.ipynb) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/citronlegacy/Video-to-Screenshots/blob/main/Youtube-Video-to-Screenshots.ipynb) |

### Project Description

This Google Colab notebook is a project for generating frames from a video. It can take a video file as input and output a screenshot every frame or every n frames.
I recommend not outputting every frame as it would generate thousands of images for even a few minutes of video.

### Libraries Used

- **FFmpeg:** A multimedia framework for handling audio, video, and other multimedia files.
- **tqdm:** A library for displaying progress bars in Python.
- **subprocess:** A module to spawn new processes, connect to their input/output/error pipes, and obtain their return codes.
- **shlex:** A module for parsing strings into tokens, especially useful when dealing with command-line-like syntax.
- **os:** A module for interacting with the operating system, providing functionality to manage directories and files.
- **ipywidgets:** A library for creating interactive widgets in Jupyter notebooks.
- **pytube:** A library for downloading YouTube videos.
- **zipfile:** A module to work with zip archives in Python.


### Project Disclaimer

This Colab notebook is provided for educational and informational purposes only. The content and code within this notebook are not intended for production use, and any actions taken based on the provided information are at your own risk.
When using the `pytube` library to download videos from YouTube, please be aware of YouTube's terms of service. Unauthorized downloading of videos may violate YouTube's terms.

---

In [None]:
#@markdown ### Install requirements and connect to Google Drive

import time
import os
from IPython import get_ipython
from IPython.display import display, Markdown
from google.colab.output import clear as clear_output
!apt-get install ffmpeg
import subprocess
import shlex
import re
!pip install tqdm
from tqdm import tqdm
from ipywidgets import widgets
!pip install pytube
from pytube import YouTube
import zipfile

if not os.path.exists('/content/drive'):
  from google.colab import drive
  print("📂 Connecting to Google Drive...")
  drive.mount('/content/drive')

def check_directory_exists(directory_path):
    return os.path.exists(directory_path) and os.path.isdir(directory_path)

def countNumberOfFilesInFolder(folder):
  count = 0
  # Iterate directory
  for f in os.listdir(folder):
      # check if current path is a file and also not a .txt file
      if (os.path.isfile(os.path.join(folder, f))):
          count += 1
  return count

def clean_string(input_string):
    # Remove special characters
    cleaned_string = re.sub(r'[^\w\s]', '', input_string)

    # Replace whitespaces with underscores
    cleaned_string = cleaned_string.replace(' ', '_')

    return cleaned_string


def get_file_size(file_path):
    try:
        size_bytes = os.path.getsize(file_path)
        size_kilobytes = size_bytes / 1024.0
        size_megabytes = size_kilobytes / 1024.0
        size_gigabytes = size_megabytes / 1024.0

        print(f"File Size: {size_kilobytes:.2f} KB | {size_megabytes:.2f} MB | {size_gigabytes:.2f} GB")
        return size_bytes
    except Exception as e:
        print(f"Error: {e}")
        return None


def zip_folder(folder_path, zip_path):
    try:
        with zipfile.ZipFile(f"{zip_path}.zip", 'w', zipfile.ZIP_DEFLATED) as zipf:
            total_files = sum([len(files) for root, dirs, files in os.walk(folder_path)])

            with tqdm(total=total_files, desc="Zipping", unit="file") as pbar:
                for foldername, subfolders, filenames in os.walk(folder_path):
                    for filename in filenames:
                        file_path = os.path.join(foldername, filename)
                        arcname = os.path.relpath(file_path, folder_path)
                        zipf.write(file_path, arcname=arcname)
                        pbar.update(1)

        print(f"\nFolder '{folder_path}' \nSuccessfully zipped to '{zip_path}.zip'")
        zipfileReference = zip_path +".zip"
        get_file_size(zipfileReference)
    except Exception as e:
        print(f"Error: {e}")




print("Install Successfull!")

In [None]:
#@markdown ### Get a Youtube Video
#@markdown Define your folder in Colab or Google Drive
# Set variables
input_storage_location = "Video file in Google Drive (/content/drive/MyDrive/)" #@param ["Video file in Colab (/content/)", "Video file in Google Drive (/content/drive/MyDrive/)"]
project_name = "video2screens" #@param {type:"string"}

# Set working folder
if "Video file in Google Drive" in input_storage_location:
    working_folder = os.path.join("/content/drive/MyDrive/", project_name)
else:
    working_folder = os.path.join("/content", project_name)

# Check if the working_folder exists, create if not
if not os.path.exists(working_folder):
    os.makedirs(working_folder)

# YouTube video URL
youtube_video_url = "https://www.youtube.com/watch?v=dq2oEeXgj5s"  #@param {type:"string"}


# Download the video
try:
    yt = YouTube(youtube_video_url)
    video_stream = yt.streams.get_highest_resolution()
    video_stream.download(output_path=working_folder)
    print(f"Video downloaded successfully to: {os.path.join(working_folder, yt.title)}")
    video_file_name = f"{yt.title}.mp4"
    print(f"Video File name: {video_file_name}")

except Exception as e:
    print(f"Error: {e}")


In [None]:

#@markdown ### Get Screenshots for a Video

# Define a function to check if a directory exists
def check_directory_exists(directory):
    return os.path.exists(directory)

# Set the working_folder based on input_storage_location
if "Video file in Google Drive" in input_storage_location:
    working_folder = os.path.join("/content/drive/MyDrive/", project_name)
else:
    working_folder = os.path.join("/content", project_name)

# Check if the working_folder exists
if not check_directory_exists(working_folder):
    print(f"The directory '{working_folder}' does not exist.")


print(f"There are {countNumberOfFilesInFolder(working_folder)} files in {working_folder}")

screenshots_output_folder = "" #@param {type:"string"}
output_storage_location = "Store in Google Drive (/content/drive/MyDrive/)" #@param ["Store in colab (/content/)", "Store in Google Drive (/content/drive/MyDrive/)"]
default_output_folder = f"{video_file_name}_output"

cleaned_default_output_folder = clean_string(default_output_folder)

screenshots_output_folder = screenshots_output_folder or cleaned_default_output_folder
#@markdown Adjust how often you want to screenshot frames. (Example: 30 FPS for 1 minute is 1800 screenshots)
frame_interval = 10 #@param {type:"integer"}
#@markdown NOTE: Progress bar is not accurate if you adjust the frame_interval

#@markdown Check this box if you want to delete the output folder before creating new output
delete_output_flag = True #@param {type:"boolean"}

#@markdown Automatic Zip Output Options
zip_output = False #@param {type:"boolean"}

# Check if video_file_name is empty
assert video_file_name, "Error: video_file_name is empty. Please provide a valid file name."


def delete_output_directory(output_directory):
    if os.path.exists(output_directory):
        subprocess.run(['rm', '-r', output_directory])
        print(f"Output directory '{output_directory}' deleted.")

def run_ffmpeg_command(input_video, screenshots_output_folder, frame_interval):
    # Create the output directory if it doesn't exist
    subprocess.run(['mkdir', '-p', screenshots_output_folder])

    # Get total number of frames in the video
    ffprobe_command = f'ffprobe -v error -select_streams v:0 -show_entries stream=nb_frames -of default=nokey=1:noprint_wrappers=1 "{input_video}"'
    total_frames = int(subprocess.check_output(shlex.split(ffprobe_command)).decode('utf-8').strip())

    # FFmpeg command to extract frames with progress bar
    ffmpeg_command = f'ffmpeg -i "{input_video}" -vf "select=not(mod(n\,{frame_interval})),setpts=N/FRAME_RATE/TB" -vsync vfr "{screenshots_output_folder}/output_frames_%04d.png" -progress pipe:1'

    # Run FFmpeg command with progress bar
    process = subprocess.Popen(shlex.split(ffmpeg_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1, universal_newlines=True)

    # Parse progress information
    duration_pattern = re.compile(r"Duration: (\d+:\d+:\d+\.\d+),")
    time_pattern = re.compile(r"time=(\d+:\d+:\d+\.\d+)")
    total_duration = None

    with tqdm(total=total_frames, unit="frame", unit_scale=True, desc="Processing") as pbar:
        for line in process.stderr:
            duration_match = duration_pattern.search(line)
            time_match = time_pattern.search(line)

            if duration_match:
                total_duration = duration_match.group(1)

            if time_match and total_duration:
                current_time = time_match.group(1)
                progress_percentage = (time_to_seconds(current_time) / time_to_seconds(total_duration)) * 100
                frames_processed = int(progress_percentage * total_frames / 100)
                pbar.update(frames_processed - pbar.n)

    # Wait for the process to finish
    process.wait()

    # Check for errors
    if process.returncode != 0:
        print(f"\nError: FFmpeg process failed with return code {process.returncode}")
    else:
        print(f"\nFrames extracted successfully. Output directory: {screenshots_output_folder}")

def time_to_seconds(time_str):
    h, m, s = map(float, time_str.split(':'))
    return h * 3600 + m * 60 + s


input_video_path = os.path.join(working_folder, video_file_name)
#set default location to google drive
output_frames_directory = os.path.join(working_folder, screenshots_output_folder)

if output_storage_location == "Store in colab (/content/)":
    # Code for storing in colab session
    storage_path = "/content/"
    output_frames_directory = os.path.join(storage_path, project_name, screenshots_output_folder)
    print("Storing in colab session.")

elif output_storage_location == "Store in Google Drive (/content/drive/MyDrive/)":
    # Code for storing in Google Drive
    storage_path = "/content/drive/MyDrive/"
    output_frames_directory = os.path.join(storage_path, project_name, screenshots_output_folder)
    print("Storing in Google Drive.")


if (delete_output_flag):
  print("delete_output_flag is true")
  delete_output_directory(output_frames_directory)

run_ffmpeg_command(input_video_path, output_frames_directory, frame_interval)

print(f"There are {countNumberOfFilesInFolder(output_frames_directory)} images in the output directory")



if (zip_output):
  print("_" * 50) #Print Horizontal bar
  zip_output_storage_location = "Store in colab (/content/)" #@param ["Store in colab (/content/)", "Store in Google Drive (/content/drive/MyDrive/)"]

  if zip_output_storage_location == "Store in colab (/content/)":
      # Code for storing in colab session
      storage_path = "/content/"
      output_zip_directory = os.path.join(storage_path, screenshots_output_folder)
      print(f"Zipping output in Google Drive: {output_zip_directory}")

  elif zip_output_storage_location == "Store in Google Drive (/content/drive/MyDrive/)":
      # Code for storing in Google Drive
      storage_path = "/content/drive/MyDrive/"
      output_zip_directory = os.path.join(storage_path, project_name, screenshots_output_folder)
      print(f"Zipping output in Google Colab: {output_zip_directory}")

  folder_to_zip = output_frames_directory
  zip_output_path = output_zip_directory

  zip_folder(folder_to_zip, zip_output_path)


In [None]:
#@title Zip the Screenshots Output Folder

zip_output_storage_location = "Store in colab (/content/)" #@param ["Store in colab (/content/)", "Store in Google Drive (/content/drive/MyDrive/)"]

if zip_output_storage_location == "Store in colab (/content/)":
    # Code for storing in colab session
    storage_path = "/content/"
    output_zip_directory = os.path.join(storage_path, screenshots_output_folder)
    print(f"Zipping output in Google Drive: {output_zip_directory}")

elif zip_output_storage_location == "Store in Google Drive (/content/drive/MyDrive/)":
    # Code for storing in Google Drive
    storage_path = "/content/drive/MyDrive/"
    output_zip_directory = os.path.join(storage_path, project_name, screenshots_output_folder)
    print(f"Zipping output in Google Colab: {output_zip_directory}")

folder_to_zip = output_frames_directory
zip_output_path = output_zip_directory

zip_folder(folder_to_zip, zip_output_path)