# VLC Frame Extract and Upload Script

[![Open In Colab <](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/edgeimpulse/utils-video-frame-extraction/blob/main/vlc_frame_extract_and_upload.ipynb)

To use this script, upload a video to /content (use the pane on the left side). Run through the cells until you get to "Settings." Update the following settings:
* Edge Impulse API key
* Path to input videos
* Labels

In [None]:
### Update Node.js to the latest stable version
!npm cache clean -f
!npm install -g n
!n stable

In [None]:
### Install required packages and tools
!npm install -g --unsafe-perm edge-impulse-cli

In [None]:
### Install ffmpeg
!apt install ffmpeg

In [None]:
### Use some packages
import random, os, shutil, uuid

In [None]:
### Settings

# Video and label ("label": ["/path/to/video1", "/path/to/video2", ...])
VIDEO_PATHS = {
  "dance_a": ["/content/dance_a.mp4"],
  "dance_c": ["/content/dance_c.mp4"],
  "dance_m": ["/content/dance_m.mp4"],
  "_unknown": ["/content/dance_unknown.mp4"],
  "dance_y": ["/content/dance_y.mp4"]
}

# Copy from Edge Impulse > your_project > Dashboard > Keys
EI_API_KEY = "ei_dcfefd264337a90b49447e65d1f07ad39513b36d8206d16bacdae4813369c442"

# ffmpeg framerate (e.g. "30" for 30 fps)
FRAME_RATE = "10"

# Working paths
TEMP_PATH = "/content/temp"
OUTPUT_PATH = "/content/output"

# Set aside 20% for test set (Edge Impulse automatically extracts validation set during training)
TEST_RATIO = 0.2

# You are welcome to change the seed to try a different validation set split
random.seed(42)

In [None]:
### Delete and re-create working directories
if os.path.exists(TEMP_PATH):
  shutil.rmtree(TEMP_PATH)
os.makedirs(TEMP_PATH)
if os.path.exists(OUTPUT_PATH):
  shutil.rmtree(OUTPUT_PATH)
for path in [os.path.join(OUTPUT_PATH, label) for label in VIDEO_PATHS]:
  os.makedirs(path)

In [None]:
### Extract all frames from all videos and copy them to the appropriate directories
for label in VIDEO_PATHS:
  for video_path in VIDEO_PATHS[label]:
    
    # Extract all frames in video into temp direcotry
    print("Extracting:", label)
    !ffmpeg \
      -i {video_path} \
      -r {FRAME_RATE} \
      -hide_banner \
      -loglevel error \
      "{TEMP_PATH}/{label}.%d.jpg"

    # Append filenames with UUIDs and move to output directory
    for filename in os.listdir(TEMP_PATH):
      file_path = os.path.join(TEMP_PATH, filename)
      id = str(uuid.uuid4().hex)[-12:]
      base_path = os.path.basename(file_path)
      file_stem = base_path.rsplit('.', 1)[0]
      uuid_file = file_stem + "_" + id + ".jpg"
      shutil.move(file_path, os.path.join(OUTPUT_PATH, label, uuid_file))

In [None]:
### Send all files to Edge Impulse project

# Go through each label
for label in VIDEO_PATHS:

  # Create list of file paths
  paths = []
  for filename in os.listdir(os.path.join(OUTPUT_PATH, label)):
    paths.append(os.path.join(OUTPUT_PATH, label, filename))

  # Shuffle and divide into test and training sets
  random.shuffle(paths)
  num_test_samples = int(TEST_RATIO * len(paths))
  test_paths = paths[:num_test_samples]
  train_paths = paths[num_test_samples:]

  # Create arugments list (as a string) for CLI call
  test_paths = ['"' + s + '"' for s in test_paths]
  test_paths = ' '.join(test_paths)
  train_paths = ['"' + s + '"' for s in train_paths]
  train_paths = ' '.join(train_paths)

  # Use CLI tool to send training set to Edge Impulse
  !edge-impulse-uploader \
  --category training \
  --label "{label}" \
  --api-key {EI_API_KEY} \
  --silent \
  --concurrency 20 \
  {train_paths}

  # Use CLI tool to send test set to Edge Impulse
  !edge-impulse-uploader \
  --category testing \
  --label "{label}" \
  --api-key {EI_API_KEY} \
  --silent \
  --concurrency 20 \
  {test_paths}