In [1]:
import pytube
import librosa
import ffmpy
import os
import glob
import subprocess
import platform
import IPython

In [None]:
yt_lst = ['https://www.youtube.com/watch?v=Z7YMDwzUTds',
         'https://www.youtube.com/watch?v=H-HVZJ7kGI0',
         'https://www.youtube.com/watch?v=VwVg9jCtqaU',
         'https://www.youtube.com/watch?v=yCC09vCHzF8',
         'https://www.youtube.com/watch?v=katiy95_mxo',
         'https://www.youtube.com/watch?v=wjTJVhmu1JM',
         'https://www.youtube.com/watch?v=uv6KLbkvua8']

In [None]:
platform.system() == 'Linux'

#### Download YouTube videos using `pytube`

In [2]:
def get_video_itag(stream_lst,
                  res,
                  subtype='mp4'):
    '''Return the `itag` of a YouTube video with specified resolution and subtype.
    If the desired resolution does not exist, the user is prompt to input a new one from a list.
    
    Input:
      stream_lst:  a list of available media formats
      res:         desired resolution, string of the form `xxxp`, where `x` is a number
      subtype:     desired subtype, string -- available options are `mp4` (default) and `webm`
    Output:
      `itag` of YouTube video
    '''
    video_streams = [stream for stream in stream_lst if stream.includes_audio_track == False]
    resolutions = [stream.resolution for stream in video_streams 
                   if stream.resolution != None and stream.subtype == subtype]
    if res not in resolutions:
        print('Select a new video resolution from the list: ', resolutions)
        new_res = input()
        return get_video_itag(stream_lst, new_res, subtype)
    itag = [stream.itag for stream in video_streams if stream.resolution == res and stream.subtype == subtype]
    video_itag = itag[0]
    return video_itag

In [3]:
def get_audio_itag(stream_lst,
                  abr,
                  subtype='mp4'):
    '''Return the `itag` of a YouTube video with specified audio bitrate (`abr`) and subtype.
    If the desired `arb` does not exist, the user is prompt to input a new one from a list.
    
    Input:
      stream_lst:  list of available media formats
      abr:         desired bit rate, string of the form `xxxkpbs`, where `x` is a number
      subtype:     desired subtype, string -- available options are `mp4` (default) and `webm`
    Output:
      `itag` of YouTube video
    '''
    audio_streams = [stream for stream in stream_lst if stream.includes_audio_track == True
                    and stream.includes_video_track == False]
    audio_abrs = [stream.abr for stream in audio_streams if stream.subtype == subtype]
    if abr not in audio_abrs:
        print('Select a new abr variable from the following list: ', audio_abrs)
        new_abr = input()
        return get_audio_itag(stream_lst, new_abr, subtype)
    itag = [stream.itag for stream in audio_streams if stream.abr == abr]
    audio_itag = itag[0]
    return audio_itag

In [4]:
def download_medium(youtube_url,
                   out_dir,
                   audio_filename,
                   video_filename,
                   res,
                   abr,
                   subtype='mp4'):
    '''Download the audio and video from a requested YouTube object.
    Audio and video are downloaded seperately and stored in seperate folders.
    
    Input:
      youtube_url:       url address of requested YouTube video
      out_dir:           parent directory where audio and video files will be stored 
      audio_name:        output audio name
      video_name:        output video name
      res, abr, subtype: arguments of `get_audio_itag` and `get_video_itag` functions
    Output:
      None    
    '''
    yt_obj = pytube.YouTube(youtube_url)                     # YouTube object
    streams = yt_obj.streams.all()                           # list of available media formats
    # [a] video
    # create path
    path_name=os.path.join(out_dir, 'video')
    os.makedirs(path_name, mode=0o777, exist_ok=True)
    # get `itag`
    video_itag=get_video_itag(stream_lst=streams, res=res, subtype='mp4')
    # download video
    yt_obj.streams.get_by_itag(video_itag).download(output_path=path_name, filename=video_filename, filename_prefix=None)
    # [b] audio
    # create path
    path_name=os.path.join(out_dir, 'audio')
    os.makedirs(path_name, mode=0o777, exist_ok=True)
    # get `itag`
    audio_itag=get_audio_itag(stream_lst=streams, abr=abr, subtype='mp4')
    # download audio
    yt_obj.streams.get_by_itag(audio_itag).download(output_path=path_name, filename=audio_filename, filename_prefix=None)

In [7]:
def download_media(doc_path,
                  out_dir,
                  res='360p',
                  abr='128kpbs',
                  subtype='mp4'
                  ):            
    '''Download audio and video from a YouTube object; url addresses are provided by a text file
    
    Input:
      doc_path:          text file containg url addresses--each line is a single address
      out_dir:           parent directory where audio and video files will be stored
      res, abr, subtype: see `get_audio_itag` and `get_video_itag` functions
    Output
      None
    '''
    with open(doc_path, 'r') as f:
        yt_urls = f.read().splitlines()
    
    # print(yt_urls)
    
    for idx,url in enumerate(yt_urls):
        audio_name = 'audio_'+str(idx)
        video_name = 'video_'+str(idx)
        download_medium(youtube_url=url, out_dir=out_dir, audio_filename=audio_name, video_filename=video_name, 
                       res='360p', abr='128kbps',subtype='mp4')

In [8]:
download_media(doc_path='../yt_dummy.txt',
              out_dir='../data')

#### Join audio & video files using `ffmpeg` & `subprocess` 

__Objective__. Join audio and video into a single medium.

- read file names from folder
- associate corresponding audio and video files
- use ffmpeg to join audio and video files in a single medium

In [33]:
def main(in_dir = '../data',
            out_dir = '../data'):
    '''
    Merges a video file with its associated audio file creating a single medium, which it is
    stored in a directory `out_dir/media/`
    
    Input:
      in_dir:  the directory containing the `audio` and `video` folders
      out_dir: the directory containing the `media` folder where the merged media will be stored
    Output:
      None
    '''
    # [1] match associated audio and video
    # e.g. audio_k is matched with video_k
    
    audio_path = os.path.join(in_dir, 'audio', '')
    video_path = os.path.join(in_dir, 'video', '')
    
    audio_files = os.listdir(audio_path)
    video_files = os.listdir(video_path)
    
    matched_pairs = [(video_name, audio_name)
                for video_name in video_files for audio_name in audio_files
                if video_name.split('.')[0].split('_')[-1] ==  
                   audio_name.split('.')[0].split('_')[-1]]
    
    # print(matched_pairs)
    
    # [2] preparing the output folder and merging audio and video into a single medium
    
    path_name = os.path.join(out_dir, 'media', '')
    os.makedirs(path_name, mode=0o777, exist_ok=True)

    for idx in range(len(matched_pairs)):
        video = os.path.join(in_dir, 'video', matched_pairs[idx][0])
        audio = os.path.join(in_dir, 'audio', matched_pairs[idx][1])
        output_name = 'medium_'+str(idx)+'.mp4'
        output = os.path.join(path_name, output_name)

        inp = {audio:None, video:None}
        oup = {output:['-c', 'copy']}

        ff = ffmpy.FFmpeg(inputs=inp, outputs=oup)
        # print(ff.cmd)
        ff.run()

In [34]:
av_merge()

In [30]:
# paths
audio_path = os.path.join('..', 'data', 'audio', '')
video_path = os.path.join('..', 'data', 'video', '')

print(os.listdir(audio_path))



# audio filenames
audio_filenames = glob.glob(audio_path+'*.mp4')
print(audio_filenames)
# video filenames
video_filenames = glob.glob(video_path+'*.mp4')
print(video_filenames)

# split and get the the index from the filename of the first element of `audio_filenames`
# e.g., retrieve `0` from `../data/audio/audio_0.mp4`
audio_filenames[0].split('/')[-1].split('.')[0].split('_')[-1]

# matched pairs: match associated audio and video files
matched_pairs = [(video_name, audio_name)
                for video_name in video_filenames for audio_name in audio_filenames
                if video_name.split('/')[-1].split('.')[0].split('_')[-1] ==  
                   audio_name.split('/')[-1].split('.')[0].split('_')[-1]]
matched_pairs

['audio_1.mp4', 'audio_0.mp4']
['../data/audio/audio_1.mp4', '../data/audio/audio_0.mp4']
['../data/video/video_1.mp4', '../data/video/video_0.mp4']


[('../data/video/video_1.mp4', '../data/audio/audio_1.mp4'),
 ('../data/video/video_0.mp4', '../data/audio/audio_0.mp4')]

In [27]:
# prepare output folder
path_name = os.path.join('..', 'data', 'media')
os.makedirs(path_name, mode=0o777, exist_ok=True)

# join audio and video into a single medium
for idx in range(len(matched_pairs)):
    video = matched_pairs[idx][0]
    audio = matched_pairs[idx][1]
    output_name = 'medium_'+str(idx)+'.mp4'
    output = os.path.join(path_name, output_name)

    inp = {audio:None, video:None}
    oup = {output:['-c', 'copy']}

    ff = ffmpy.FFmpeg(inputs=inp, outputs=oup)
    # print(ff.cmd)
    ff.run()

#### Segmet media files

__Objective__. Implement a reasonable approach to segment media files into non-overlapping segments in order to facilitate the annotation process. 

__Segment audio files__

In [None]:
filename = audio_filenames[4]
y, sr = librosa.load(filename, sr=None)

In [None]:
 IPython.display.Audio(data=y[:], rate=sr)

In [None]:
w = librosa.effects.split(y, top_db=40)

In [None]:
 IPython.display.Audio(data=w[:], rate=sr)