In [2]:
!pip install pytube
!pip install ffmpeg



In [3]:
from pytube import YouTube, Playlist
import os
import re
from tqdm import tqdm
import ffmpeg

In [None]:
class YoutubePlaylistDownloader():
    def __init__(self, playlist_url, resolution, dest) -> None:
        self.playlist_url = playlist_url
        self.resolution = resolution
        if not os.path.exists(dest):
            os.makedirs(dest)
        self.dest = dest

    def __call__(self) -> None:
        '''
            Download all video from the YouTube playlist
        '''
        video_urls = Playlist(self.playlist_url).video_urls
        for video_url in tqdm(video_urls):
            try:
                video = YouTube(video_url)
                print(f'\nDownloading {video.title}')
                date = re.search(r'(\d{1,2})\/(\d{1,2})\/(\d{4})', video.title)
                year = date.group(3)
                month = date.group(2) if len(date.group(2)) > 1 else '0' + date.group(2)
                day = date.group(1) if len(date.group(1)) > 1 else '0' + date.group(1)
                video_name = f'{year}{month}{day}.mp4'
                if os.path.isfile(video_name):
                    continue
                if self.resolution == 'high':
                    stream = video.streams.get_highest_resolution()
                else:
                    stream = video.streams.get_lowest_resolution()
                stream.download(filename=video_name,
                                output_path=self.dest)
                print(f'Saved as {video_name}')
            except Exception as e:
                print(f'Cannot download video {video_url}')
                print(f'-> {e}')
        print('Downloading completed!')

In [15]:
def trim_video(videos_path, save_path, start=0, end=None) -> None:
    '''
        Trim a part of the video

        Parameters:
            videos_path: str
                path to audio folder
            save_path: str
                path to folder that contain the result
            start: float
                where to begin trimming
            end: float
                where to end trimming
    '''
    assert os.path.exists(videos_path) and os.listdir(videos_path) != []
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    for audio in os.listdir(videos_path):
        video_path = os.path.join(videos_path, audio)
        dest = os.path.join(save_path, audio)
        input_stream = ffmpeg.input(video_path)
        pts = 'PTS-STARTPTS'
        if end is None:
            end = ffmpeg.probe(video_path).get('format', {}).get('duration')
        video = input_stream.trim(start=start, end=end).setpts(pts)
        audio = (input_stream
                 .filter_('atrim', start=start, end=end)
                 .filter_('asetpts', pts))
        output = ffmpeg.output(ffmpeg.concat(
            video, audio), dest, format='mp4')
        output.run()


In [None]:
videos_path = '../data/videos'

# Start downloading
YoutubePlaylistDownloader(playlist_url='https://www.youtube.com/watch?v=cPAlAOD-Og4&list=PL_UeYNcd7KvpDfdqPILdqdeWVeaLVsjqz',
                          resolution='high',
                          dest=videos_path)()

# Remove the first minute of the video
trim_video(videos_path=videos_path,
           save_path='../data/trimmed_videos',
           start=1)
