# Video enrichment system for pets

### John Burt
### June-July 2020

### Notebook purpose:

Create a youtube video playlist suitable for use by the video enrichment software. The original 

Input:
- A CSV file with two columns:
    - search_term: the words used in your youtube search that found the video
    - url: the youtube URL for the video. Example: 'https://www.youtube.com/watch?v=pUrYNXQIiSM'

Output:
- A CSV file with information collected about each video

In [18]:
# remove warnings
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from datetime import datetime
import time 
import os

import vlc
import pafy


def load_playlist(csvpath, APIkey=None):

    if APIkey is not None:
        pafy.set_api_key(key)
    
    # load the data
    df = pd.read_csv(csvpath)
    
    df['title'] = ['']*df.shape[0]
    df['rating'] = [0]*df.shape[0]
    df['duration'] = [0]*df.shape[0]
    df['position'] = [0]*df.shape[0]
    # causes youtube API key error if you don't pass a key:
    df['description'] = ['']*df.shape[0]
    df['category'] = ['']*df.shape[0]
    df['keywords'] = ['']*df.shape[0]
    
    for i in range(df.shape[0]):
        video = pafy.new(df.iloc[i].url)
        df.title.iloc[i] = video.title
        df.rating.iloc[i] = video.rating
        df.duration.iloc[i] = video.length
        if APIkey is not None:
            df.description.iloc[i] = video.description
            df.category.iloc[i] = video.category
            df.keywords.iloc[i] = video.keywords
        print('%d/%d: %s'%(i+1,df.shape[0],df.title.iloc[i]))
    return df

sourcedir = './playlists/'
sourcefile = 'youtube_videos.csv'

# Get a Google Youtube API key and place it in text file 'API_key.txt'.
#  Some video info can only be accessed if you provide an API key.
APIkey = None
if os.path.isfile('API_key.txt'):
    with open('API_key.txt','r+') as file:
        APIkey = file.read()

df = load_playlist(sourcedir+sourcefile, APIkey=APIkey)

df.to_csv(sourcedir+sourcefile.split('.')[0]+'_pl.csv',index=False)
df.head()


AIzaSyDuE809c1a6pmpVZSaF-MkQ-ZaFn95EMkg


In [13]:
print(df.keywords)

0     [lambs, sheep, baby sheep, baby lamb, funny, c...
1     [Devonairs kelpies, Eveready kelpies. working ...
2     [sheep, running, happy, happy sheep, grass, go...
3                                      [Sheep, running]
4                                                    []
5     [lamb, lambs, cute, animals, sanctuary, kindness]
6                                                    []
7     [westminster kennel club, kennel, club, dog, s...
8     [flying disc, incredible dog challenge, incred...
9     [westminster kennel club, westminster, masters...
10    [holster, dog show, westminster, puppies, run,...
11    [WKC, 2019 Masters Agility, 2019, Masters Agil...
12    [rad dog agility, agility training vlog, dog a...
13                                                   []
14    [dogs, playing, hollandse herder, spelen, hond...
15    [funny dog videos, horse videos, dog and horse...
16    [puppy enjoy water, dogs, puppies, love, water...
17    [dog rescue, animal rescue, dogs, Harmony 