### This notebook saves YouTube videos listed in ./video_url.txt to a folder named ./video
- Video's name would be its ID in YouTube

### After that, the frames of videos are extracted to ./images
- For example, the image frames of 'abcdefghi.mp4' would be extracted into './images/abcdefghi/scene_00000001.jpg'

In [1]:
import config
import os
import youtube_dl
import cv2
from moviepy.editor import *

In [2]:
f = open(config.url_filePath, 'r')
urls = f.readlines()

In [3]:
os.makedirs(config.video_dir, exist_ok=True)

In [4]:
if config.fps != -1:
    for u in urls:
        fname = u.split("=")[1][:-1] + '_tmp.mp4'
        print('Processing {}'.format(fname))
        ydl_opts = {'outtmpl': os.path.join(config.video_dir, fname), 'format': 'mp4'}
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([u])
    video_files = [os.path.join(config.video_dir, fp) for fp in os.listdir(config.video_dir) if fp[0]!='.']

    for fp in video_files:
        clip = VideoFileClip(fp)
        clip = clip.resize(height=config.vid_height)
        print('FPS: ', clip.fps, '-------------->', config.fps)
        clip.write_videofile(os.path.splitext(fp)[0].split('_tmp')[0]+os.path.splitext(fp)[1], fps=config.fps)
        clip.reader.close()
        if '_tmp.mp4' in fp:
            os.remove(fp)
else:
    for u in urls:
        fname = u.split("=")[1][:-1] + '.mp4'
        print('Processing {}'.format(fname))
        ydl_opts = {'outtmpl': os.path.join(config.video_dir, fname), 'format': 'mp4'}
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([u])

Processing 93r7YErnE1o.mp4
[youtube] 93r7YErnE1o: Downloading webpage
[download] Destination: videos/93r7YErnE1o.mp4
[download] 100% of 31.29MiB in 00:0192MiB/s ETA 00:008
Processing D9H-eRIROA.mp4
[youtube] D9H-eRIROAE: Downloading webpage
[download] Destination: videos/D9H-eRIROA.mp4
[download] 100% of 18.75MiB in 00:0056MiB/s ETA 00:007


In [5]:
video_ids = [fp.split('.')[0] for fp in os.listdir(config.video_dir) if fp[0]!='.']

In [6]:
os.makedirs(os.path.join(config.image_dir), exist_ok=True)
for v_id in video_ids:
    os.makedirs(os.path.join(config.image_dir, v_id), exist_ok=True)
    vid_fp = os.path.join(config.video_dir, v_id+'.mp4')
    
    print('Reading {}'.format(vid_fp))
    
    vidcap = cv2.VideoCapture(vid_fp)
    success, image = vidcap.read()
    cnt = 0
    while success:
        cv2.imwrite(os.path.join(config.image_dir, v_id, "scene_{0:08}.jpg".format(cnt)), image)
        success, image = vidcap.read()
        cnt += 1

Reading videos/D9H-eRIROA.mp4
Reading videos/93r7YErnE1o.mp4


### Cut some frames in start and end phase (if you want)

In [7]:
CUT_FRONT_END = True
if CUT_FRONT_END:
    for v_id in video_ids:
        img_dir = os.path.join(config.image_dir, v_id)
        for i in range(config.front_rear_cut):
            if 'scene_{0:08}.jpg'.format(i) in os.listdir(img_dir):
                os.remove(os.path.join(img_dir, 'scene_{0:08}.jpg'.format(i)))
            elif 'scene_{0:08}.jpg'.format(len(os.listdir(img_dir))-i) in os.listdir(img_dir):
                os.remove(os.path.join(img_dir, 'scene_{0:08}.jpg'.format(len(os.listdir(img_dir))-i)))    