In [1]:
import os
import subprocess as sp
import re
from moviepy.editor import VideoFileClip
from tqdm import tqdm

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Crop Video Length



In [7]:
# only required for film clips from LATEMO-E dataset

def trim_video(input_dir):

  video_files = [f for f in os.listdir(input_dir) if f.endswith(".mp4")]

  for video in video_files:
    input_path = os.path.join(input_dir, video)
    temp_path = os.path.join(input_dir, "temp_" + video)  # Temporary output file

    # FFmpeg command to trim first 2 seconds
    command = ["ffmpeg", "-y", "-ss", "2", "-i", input_path,
               "-c", "copy", temp_path]

    # run FFmpeg command
    sp.run(command, stdout=sp.PIPE, stderr=sp.PIPE)

    # replace original file with the cropped version
    os.replace(temp_path, input_path)

  print("Video trimming complete.")

In [None]:
# run to execute function
input_dir = "/content/drive/My Drive/LATEMOEOriginal"
trim_video(input_dir)

# Crop Black Borders

In [1]:
def crop_video(input_dir, video_dir):

  cropdetect_output = sp.run(['ffmpeg', '-hide_banner', '-i', input_dir,
                              '-vf','cropdetect=skip=0', '-t', '5', '-f',
                              'null', 'pipe:'], stderr=sp.PIPE,
                             universal_newlines=True).stderr

  crop_filter = re.search('crop=.*', cropdetect_output).group(0)

  command = ["ffmpeg", "-i", input_dir, "-vf", crop_filter, "-c:v", "libx264",
             "-crf", "15", video_dir]

  result = sp.run(command, capture_output=True, text=True)


In [None]:
# FilmStim Dataset

film_stim = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
             21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 38,
             39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
             56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]

for vid in tqdm(film_stim, desc="Processing videos"):
  input_dir = f"/content/drive/My Drive/FilmStimOriginal/{vid}.mpeg"
  video_dir = f"/content/drive/My Drive/Cropped_Videos/{vid}_cropped.mpeg"
  crop_video(input_dir, video_dir)

In [None]:
# LATEMO-E Dataset

latemo_e = [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
            87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98]

for vid in tqdm(latemo_e, desc="Processing videos"):
  input_dir = f"/content/drive/My Drive/LATEMOEOriginal/{vid}.mp4"
  video_dir = f"/content/drive/My Drive/Cropped_Videos/{vid}_cropped.mp4"
  crop_video(input_dir, video_dir)

In [None]:
# E-MOVIE Dataset

e_movie = [99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
           111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
           123, 124]

video_ext = ["mp4", "mp4", "mp4", "mp4", "mp4", "mp4", "mp4", "mp4", "mp4",
              "mp4", "mpg", "mov", "mp4", "mp4", "mp4", "mp4", "mp4", "mpg",
              "mpg", "mpg", "mp4", "mpg", "mp4", "mp4", "mpg", "mp4"]

i = 0

for vid in tqdm(e_movie, desc="Processing videos"):
  input_dir = f"/content/drive/My Drive/EMovieOriginal/{vid}.{video_ext[i]}"
  video_dir = f"/content/drive/My Drive/Cropped_Videos/{vid}_cropped.{video_ext[i]}"
  crop_video(input_dir, video_dir)
  i += 1

# Extract Audio Files

In [11]:
def extract_audio(video_id, video_ext):

  video_dir = f"/content/drive/My Drive/Cropped_Videos/{video_id}_cropped.{video_ext}"
  audio_dir = f"/content/drive/My Drive/Audio/{video_id}_audio.mp3"

  video = VideoFileClip(video_dir)
  audio = video.audio

  audio.write_audiofile(audio_dir)

  print("Audio file extraction complete.")

In [None]:
# FilmStim Dataset

film_stim = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
             21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 38,
             39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
             56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]

for i in range(0, len(film_stim)):
  format = "mpeg"
  num = film_stim[i]
  extract_audio(video_id=num, video_ext=format)

In [None]:
# LATEMO-E Dataset

latemo_e = [71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
            87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98]

for i in range(0, len(latemo_e)):
  format = "mp4"
  num = latemo_e[i]
  extract_audio(video_id=num, video_ext=format)

In [None]:
# E-MOVIE Dataset

e_movie = [99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
           111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
           123, 124]

video_ext = ["mp4", "mp4", "mp4", "mp4", "mp4", "mp4", "mp4", "mp4", "mp4",
              "mp4", "mpg", "mov", "mp4", "mp4", "mp4", "mp4", "mp4", "mpg",
              "mpg", "mpg", "mp4", "mpg", "mp4", "mp4", "mpg", "mp4"]

for i in range(0, len(e_movie)):
  format = video_ext[i]
  num = e_movie[i]
  extract_audio(video_id=num, video_ext=format)