# WIP Auto Image Cropper

| |GitHub| Colab | |
|:--|:-:|:-:|:-:|
| 🎬 **Video To Screenshots** | [![GitHub](https://img.shields.io/badge/GitHub-Visit-brightgreen.svg)](https://github.com/citronlegacy/Video-to-Screenshots/blob/main/Video-to-Screenshots.ipynb) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/citronlegacy/Video-to-Screenshots/blob/main/Video-to-Screenshots.ipynb) |
| 🎬 **Youtube Video to Screenshots** | [![GitHub](https://img.shields.io/badge/GitHub-Visit-brightgreen.svg)](https://github.com/citronlegacy/Video-to-Screenshots/blob/main/Youtube-Video-to-Screenshots.ipynb) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/citronlegacy/Video-to-Screenshots/blob/main/Youtube-Video-to-Screenshots.ipynb) |

### Project Description

This Google Colab notebook is a project for generating frames from a video. It can take a video file as input and output a screenshot every frame or every n frames.
I recommend not outputting every frame as it would generate thousands of images for even a few minutes of video.
### Libraries Used

- **FFmpeg:** A multimedia framework for handling audio, video, and other multimedia files.
- **tqdm:** A library for displaying progress bars in Python.
- **subprocess:** A module to spawn new processes, connect to their input/output/error pipes, and obtain their return codes.
- **shlex:** A module for parsing strings into tokens, especially useful when dealing with command-line-like syntax.
- **os:** A module for interacting with the operating system, providing functionality to manage directories and files.
- **ipywidgets:** A library for creating interactive widgets in Jupyter notebooks.
- **pytube:** A library for downloading YouTube videos.
- **zipfile:** A module to work with zip archives in Python.


### Project Disclaimer

This Colab notebook is provided for educational and informational purposes only. The content and code within this notebook are not intended for production use, and any actions taken based on the provided information are at your own risk.
When using the `pytube` library to download videos from YouTube, please be aware of YouTube's terms of service. Unauthorized downloading of videos may violate YouTube's terms.

---

In [None]:
import os
from PIL import Image
from tqdm import tqdm  # for progress bar

#@markdown ### Install requirements and connect to Google Drive
if not os.path.exists('/content/drive'):
  from google.colab import drive
  print("📂 Connecting to Google Drive...")
  drive.mount('/content/drive')

# Install Tesseract OCR
!apt-get install -y tesseract-ocr

# Install pytesseract Python wrapper
!pip install pytesseract

# Download Tesseract data for better OCR accuracy (optional but recommended)
!apt-get install -y tesseract-ocr-eng

!pip install pytesseract Pillow
import pytesseract

pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'

def check_directory_exists(directory_path):
    return os.path.exists(directory_path) and os.path.isdir(directory_path)

def get_directory(directory_location, project_name):
    if "Google Drive" in directory_location:
        return os.path.join("/content/drive/MyDrive/", project_name)
    else:
        return os.path.join("/content", project_name)

def countNumberOfFilesInFolder(folder):
  count = 0
  # Iterate directory
  for f in os.listdir(folder):
      # check if current path is a file and also not a .txt file
      if (os.path.isfile(os.path.join(folder, f))):
          count += 1
  return count

def delete_dir(directory):
  try:
      # Delete the directory and its contents
      shutil.rmtree(directory)
      print(f"Directory '{directory}' deleted successfully.")
  except Exception as e:
      print(f"Error deleting directory '{directory}': {e}")

In [None]:
#@title Automatically detect if there is text in an image and crop by specified pixels
#@markdown It would be best if it could automatically determine how to crop based on the detected texted but I was having issues with this

directory_location = "Directory in Google Drive (/content/drive/MyDrive/)" #@param ["Directory in Colab (/content/)", "Directory in Google Drive (/content/drive/MyDrive/)"]

input_directory = "" #@param {type:"string"}
output_directory = "" #@param {type:"string"}

delete_previous_cropped_images = True # @param {type:"boolean"}
#zip_cropped_output = True # @param {type:"boolean"}

input_directory = get_directory(directory_location, input_directory)
output_directory = get_directory(directory_location, output_directory)

if (delete_previous_cropped_images):
    delete_dir(output_directory)

cropTopByThisManyPixels = 0 # @param {type:"number"}
cropRightByThisManyPixels = 0 # @param {type:"number"}
cropLeftByThisManyPixels = 0 # @param {type:"number"}
cropBottomByThisManyPixels = 0 # @param {type:"number"}


def crop_images(input_dir, output_dir, cropTopByThisManyPixels=0, cropRightByThisManyPixels=0,
                cropLeftByThisManyPixels=0, cropBottomByThisManyPixels=260):
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    print(f"There are {countNumberOfFilesInFolder(input_dir)} images in the folder to process for auto cropping")
    # Get a list of all image files in the input directory
    image_files = [f for f in os.listdir(input_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))]

    # Set up progress bar
    progress_bar = tqdm(total=len(image_files), desc="Processing images", unit="image")

    for image_file in image_files:
        # Construct full paths for input and output images
        input_path = os.path.join(input_dir, image_file)
        output_path = os.path.join(output_dir, image_file)

        # Open the image using Pillow
        img = Image.open(input_path)

        # Use Tesseract to extract text from the image
        text = pytesseract.image_to_string(img)

        # If text is detected, crop out specified regions
        if text.strip():
            img_width, img_height = img.size

            # Crop specified regions
            top_crop = cropTopByThisManyPixels
            right_crop = img_width - cropRightByThisManyPixels
            left_crop = cropLeftByThisManyPixels
            bottom_crop = img_height - cropBottomByThisManyPixels

            # Crop the image
            cropped_img = img.crop((left_crop, top_crop, right_crop, bottom_crop))
            print(f"\nDetected text in {input_path} cropping!\n")
        else:
            cropped_img = img  # No text detected, keep the original image

        # Save the cropped image to the output directory
        cropped_img.save(output_path)

        # Update progress bar
        progress_bar.update(1)

    # Close progress bar
    progress_bar.close()


crop_images(input_directory, output_directory, cropTopByThisManyPixels,
            cropRightByThisManyPixels, cropLeftByThisManyPixels, cropBottomByThisManyPixels)
