### This notebook saves YouTube videos listed in ./video_url.csv to a folder named ./video
- Video's name would be its ID in YouTube
- CSV needs to contain video_url,start_sec,end_sec

### After that, the frames of videos are extracted to ./images
- For example, the image frames of 'abcdefghi.mp4' would be extracted into './images/abcdefghi/scene_00000001.jpg'

In [1]:
import csv
import config
import os
import youtube_dl
import cv2
from moviepy.editor import *

In [2]:
url_and_time = dict()
with open('video_url.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    cnt = 0
    for row in csv_reader:
        if cnt > 0:
            url_and_time[row[0]] = (float(row[1]), float(row[2]))
        cnt += 1
print(url_and_time)

{'https://www.youtube.com/watch?v=9WhpAVOSyl8': (25.0, 55.0)}


In [3]:
os.makedirs(config.video_dir, exist_ok=True)

In [4]:
for u, (s_t, e_t) in url_and_time.items():
    fname = u.split("=")[1] + '_tmp.mp4'
    print('Processing {}'.format(fname))
    ydl_opts = {'outtmpl': os.path.join(config.video_dir, fname), 'format': 'mp4'}
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([u])
video_files = [os.path.join(config.video_dir, fp) for fp in os.listdir(config.video_dir) if fp[0]!='.']
for fp in video_files:
    (s_t, e_t) = url_and_time['https://www.youtube.com/watch?v='+fp.split('/')[-1].split('_tmp')[0]]
    clip = VideoFileClip(fp).subclip(s_t, e_t)
    clip = clip.resize(height=config.vid_height)
    if config.fps != -1:
        print('FPS: ', clip.fps, '-------------->', config.fps)
        clip.write_videofile(fp.replace('_tmp', ''), fps=config.fps)
    else:
        clip.write_videofile(fp.replace('_tmp', ''))
    clip.reader.close()
    if '_tmp.mp4' in fp:
        os.remove(fp)

Processing 9WhpAVOSyl8_tmp.mp4
[youtube] 9WhpAVOSyl8: Downloading webpage
[download] Destination: videos/9WhpAVOSyl8_tmp.mp4
[download] 100% of 41.13MiB in 00:0199MiB/s ETA 00:004


chunk:   7%|▋         | 46/662 [00:00<00:01, 451.64it/s, now=None]

FPS:  30.0 --------------> 10
Moviepy - Building video videos/9WhpAVOSyl8.mp4.
MoviePy - Writing audio in 9WhpAVOSyl8TEMP_MPY_wvf_snd.mp3


t:   2%|▏         | 5/300 [00:00<00:06, 42.76it/s, now=None]       

MoviePy - Done.
Moviepy - Writing video videos/9WhpAVOSyl8.mp4



                                                              

Moviepy - Done !
Moviepy - video ready videos/9WhpAVOSyl8.mp4


In [5]:
video_ids = [fp.split('.')[0] for fp in os.listdir(config.video_dir) if fp[0]!='.']

In [6]:
os.makedirs(os.path.join(config.image_dir), exist_ok=True)
for v_id in video_ids:
    os.makedirs(os.path.join(config.image_dir, v_id), exist_ok=True)
    vid_fp = os.path.join(config.video_dir, v_id+'.mp4')
    
    print('Reading {}'.format(vid_fp))
    
    vidcap = cv2.VideoCapture(vid_fp)
    success, image = vidcap.read()
    cnt = 0
    while success:
        cv2.imwrite(os.path.join(config.image_dir, v_id, "scene_{0:08}.jpg".format(cnt)), image)
        success, image = vidcap.read()
        cnt += 1

Reading videos/9WhpAVOSyl8.mp4
