In [1]:
import pandas as pd
import youtube_dl

In [2]:
def parse_meta(meta):
    info = {
        'title': meta['title'],
        'like_count': meta['like_count'],
        'view_count': meta['view_count'],
        'upload_date': meta['upload_date'],
        'display_id': meta['display_id'],
        'duration': meta['duration'],
    }
    
    video_format_id, audio_format_id = meta['format_id'].split('+')
    
    for fmt in meta['formats']:
        if fmt['format_id'] == video_format_id:
            info['video_resolution'] =  '{}x{}'.format(fmt['width'], fmt['height'])
            info['video_fps'] = fmt['fps']
            info['video_codec'] =  fmt['vcodec']
            info['video_ext'] =  fmt['ext']
            info['video_filesize'] = fmt['filesize']
            info['video_format_id'] =  video_format_id
        elif fmt['format_id'] == audio_format_id:
            info['audio_codec'] = fmt['acodec']
            info['audio_ext'] = fmt['ext']
            info['audio_filesize'] = fmt['filesize']
            info['audio_format_id'] = audio_format_id

    return info

In [3]:
yt_videos = [
    'https://www.youtube.com/watch?v=jaOEY8iRo6c',
    'https://www.youtube.com/watch?v=wvzLATU8M04',
    'https://www.youtube.com/watch?v=Q2LDobhGHm4',
    'https://www.youtube.com/watch?v=NcAZdATqkxA',
    'https://www.youtube.com/watch?v=e8mvLIXoIug',
    'https://www.youtube.com/watch?v=WIVVb87JPog',
    'https://www.youtube.com/watch?v=UpX_kurvAXM',
    'https://www.youtube.com/watch?v=IAzZnHwmBfc',
    'https://www.youtube.com/watch?v=EEFInk9wlzs',
    'https://www.youtube.com/watch?v=h7VtMor4lL8',
    'https://www.youtube.com/watch?v=2aaM63uawjo',
    'https://www.youtube.com/watch?v=XLK7E1aEOuM',
    'https://www.youtube.com/watch?v=-OUplnLYcxY',
    'https://www.youtube.com/watch?v=SXw_LK0hkcg',
    'https://www.youtube.com/watch?v=ao2LuQwi4is'
]

ydl_opts = {
    'format': 'bestvideo+bestaudio',
    'merge_output_format': 'mkv',
    'outtmpl': '%(id)s',
    'quiet': True,
    'noplaylist' : True,
    'prefer_ffmpeg': True,
    'keepvideo': True,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'best',
        'preferredquality': '0',
        'nopostoverwrites': False
    }]
}

frames = []

with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    for yt_video in yt_videos:
        ydl.download([yt_video])
        info = ydl.extract_info(yt_video, download=False)
        meta = parse_meta(info)
        frames.append(pd.DataFrame({k: [meta[k]] for k in meta}))

meta = pd.concat(frames,axis=0)
meta.to_csv('meta.csv', index=False)



In [4]:
import os
for file in os.listdir('.'):
    if '.f' in file or '.' not in file:
        os.remove(file)