### This notebook saves YouTube videos listed in ./video_url.csv to a folder named ./video
- Video's name would be its ID in YouTube
- CSV needs to contain video_url,start_sec,end_sec

### After that, the frames of videos are extracted to ./images
- For example, the image frames of 'abcdefghi.mp4' would be extracted into './images/abcdefghi/scene_00000001.jpg'

In [1]:
import csv
import config
import os
import youtube_dl
import cv2
from moviepy.editor import *
print ("Done.")

Done.


### Read video URL

In [2]:
url_and_time = dict()
with open('video_url.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    cnt = 0
    for row in csv_reader:
        if cnt > 0: # skip the first line (header)
            url_and_time[row[0]] = (float(row[1]), float(row[2]))
        cnt += 1
for u, (s_t, e_t) in url_and_time.items():
    print ("YouTube URL:[%s] [%.2f]s~[%.2f]s"%(u,s_t,e_t))

YouTube URL:[https://www.youtube.com/watch?v=Yw3lYf4hYaQ] [3.00]s~[27.00]s


### Make the video directory (./videos)

In [3]:
os.makedirs(config.video_dir, exist_ok=True)
print ("[./%s] created."%(config.video_dir))

[./videos] created.


### Download the full video into (.videos/NAME_full.mp4)

In [4]:
for u, (s_t, e_t) in url_and_time.items():
    print ("YouTube URL:[%s] [%.2f]s~[%.2f]s"%(u,s_t,e_t))
    original_fname = u.split("=")[1] + '.mp4'
    fname = u.split("=")[1] + '_full.mp4'
    print('Processing [%s]'%(fname))
    # YouTube download options
    ydl_opts = {'outtmpl': os.path.join(config.video_dir, fname),'format':'mp4'}
    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
        ydl.download([u]) # download the YouTube video 
print ("\nWe have downloaded [%d] videos."%(len(url_and_time)))
for u, (s_t, e_t) in url_and_time.items():
    original_fname = u.split("=")[1] + '.mp4'
    fname = u.split("=")[1] + '_full.mp4'
    print (" [%s]"%(fname))

YouTube URL:[https://www.youtube.com/watch?v=Yw3lYf4hYaQ] [3.00]s~[27.00]s
Processing [Yw3lYf4hYaQ_full.mp4]
[youtube] Yw3lYf4hYaQ: Downloading webpage
[download] videos/Yw3lYf4hYaQ_full.mp4 has already been downloaded
[download] 100% of 11.97MiB

We have downloaded [1] videos.
 [Yw3lYf4hYaQ_full.mp4]


### Trim the videos into (.videos/NAME.mp4)

In [5]:
for u, (s_t, e_t) in url_and_time.items():
    fname = u.split("=")[1] + '_full.mp4'
    vpath = os.path.join(config.video_dir,fname)
    clip = VideoFileClip(vpath).subclip(s_t, e_t) # trim
    clip = clip.resize(height=config.vid_height) # resize
    # Save the trimmed video
    if config.fps != -1:
        print('FPS: ', clip.fps, '-------------->', config.fps)
        clip.write_videofile(vpath.replace('_full', ''), fps=config.fps)
    else:
        clip.write_videofile(vpath.replace('_full', ''))
    clip.reader.close()
    print ("YouTube URL:[%s] [%.2f]s~[%.2f]s"%(u,s_t,e_t))
    print ("Full video path:[%s]"%(vpath))

chunk:  13%|█▎        | 68/530 [00:00<00:00, 675.76it/s, now=None]

FPS:  30.0 --------------> 10
Moviepy - Building video videos/Yw3lYf4hYaQ.mp4.
MoviePy - Writing audio in Yw3lYf4hYaQTEMP_MPY_wvf_snd.mp3


t:   6%|▌         | 14/240 [00:00<00:01, 139.88it/s, now=None]     

MoviePy - Done.
Moviepy - Writing video videos/Yw3lYf4hYaQ.mp4



                                                               

Moviepy - Done !
Moviepy - video ready videos/Yw3lYf4hYaQ.mp4
YouTube URL:[https://www.youtube.com/watch?v=Yw3lYf4hYaQ] [3.00]s~[27.00]s
Full video path:[videos/Yw3lYf4hYaQ_full.mp4]


### Save Images into (./images/NAME/scene_XXXXXX.jpg)

In [6]:
os.makedirs(os.path.join(config.image_dir), exist_ok=True)
print ("[./%s] created."%(config.image_dir))

[./images] created.


In [7]:
for u, (s_t, e_t) in url_and_time.items():
    v_id = u.split("=")[1]
    fname = v_id + '.mp4'
    vpath = os.path.join(config.video_dir,fname)
    print ("Reading [%s]"%(vpath))
    os.makedirs(os.path.join(config.image_dir,v_id),exist_ok=True) # make subdir
    vidcap = cv2.VideoCapture(vpath) # open video capture
    success, image = vidcap.read()
    cnt = 0
    while success:
        image_path = os.path.join(config.image_dir,v_id,"scene_{0:08}.jpg".format(cnt))
        cv2.imwrite(image_path,image)
        success, image = vidcap.read()
        cnt += 1
        
    print ("[%d] images saved."%(cnt))

Reading [videos/Yw3lYf4hYaQ.mp4]
[240] images saved.
