In [2]:
# Here, I'm going to get a list of DEF CON Youtube videos, and use Linear Optimization to select videos for myself
## install google python API: https://developers.google.com/api-client-library/python/

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import pandas as pd

In [3]:
# steps to get API Key: https://developers.google.com/youtube/v3/getting-started

API_KEY = "[YOUR API KEY]"   # use your own API key
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

In [4]:
# Get videos, channel, playlists that match your search term
def youtube_search(search_query, max_results):
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
        developerKey=API_KEY)

    # Call the search.list method to search specified query
    search_response = youtube.search().list(
        q=search_query,
        part='id,snippet',
        maxResults=max_results
      ).execute()

    videos = []
    channels = []
    playlists = []

    # You can print out search_response here and read it through JSON viewer: http://jsonviewer.stack.hu/
    for search_result in search_response.get('items', []):
        if search_result['id']['kind'] == 'youtube#video':
            videos.append('%s (%s)' % (search_result['snippet']['title'],
                                         search_result['id']['videoId']))
        elif search_result['id']['kind'] == 'youtube#channel':
            channels.append('%s (%s)' % (search_result['snippet']['title'],
                                           search_result['id']['channelId']))
        elif search_result['id']['kind'] == 'youtube#playlist':
            playlists.append('%s (%s)' % (search_result['snippet']['title'],
                                            search_result['id']['playlistId']))
    return videos, channels, playlists

In [5]:
search_query = 'DEF CON 25 SE'
max_results = 50  # at most 50, limited by Google

vlst, clst, plst = youtube_search(search_query, max_results)

In [6]:
print len(vlst), len(clst), len(plst)

47 1 2


In [7]:
for elem in vlst:
    print elem
print
    
for elem in clst:
    print elem
print
    
for elem in plst:
    print elem

DEF CON 25 SE Village - Jayson Street - Strategies on Securing Your Banks and Enterprises (iLPI0EGs6kY)
DEF CON 25 SE Village - Keith Conway, Cameron Craig - Change Agents  How to Affect  Corp. Culture (mYRXlCGMP0Q)
DEF CON 25 - Max Bazaliy - Jailbreaking Apple Watch (eJpbi-Qz6Jc)
DEF CON 25 SE Village - Michele Fincher - Are You Killing Your Security Program? (AZw8WugNyIQ)
DEF CON 24 - Marc Newlin - MouseJack: Injecting Keystrokes into Wireless Mice (00A36VABIA4)
DEF CON 25 SE Village - Chris Hadnagy - SE vs Predator: Using SE In Ways I Never Thought (PNuPkpLuaws)
DEF CON 25 - Nathan Seidle - Open Source Safe Cracking Robots (v9vIcfLrmiA)
DEF CON 25 SE Village - Robert Wood  - Thematic Social Engineering (Rw0aJQnYXq0)
DEF CON 25 SE Village - Brent White, Tim Roberts - Skills For A Red Teamer (yJCq8ajdd0U)
DEF CON 25 SE Village - John Nye -The Human Factor  Why Are We So Bad at Security (MgXhjUzi_I0)
DEF CON 24 - Chris Rock - How to Overthrow a Government (m1lhGqNCZlA)
DEF CON 25 -  El

In [8]:
import re

# convert duration string to seconds
def duration2sec(duration_string):
    if 'M' in duration_string and 'S' in duration_string and 'H' not in duration_string:  # remove video too long/short
        ptn = "PT(\d*)M(\d*)S"
        m = re.search(ptn, duration_string)
        mins = float(m.group(1))
        secs = float(m.group(2))
        total_secs = mins*60 + secs
        return total_secs
    else:
        return -1
    

def videos_list_by_id(**kwargs):
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
        developerKey=API_KEY)
    result = youtube.videos().list(**kwargs).execute().get('items')[0]

    comment_count = result['statistics']['commentCount']
    view_count = result['statistics']['viewCount']
    favorite_count = result['statistics']['favoriteCount']
    like_count = result['statistics']['likeCount']
    description = result['snippet']['localized']['description']
    duration = duration2sec(result['contentDetails']['duration'])
    
    return {'id':kwargs['id'], 'comment_count':comment_count, 'view_count':view_count, 'favorite_count':favorite_count,
           'like_count':like_count, 'duration':duration, 'description':description}

In [9]:
video_dct = {}

idx = 0
for elem in vlst:
    title, vid = elem.split('(')
    vid = vid.split(')')[0]
    item_dct = videos_list_by_id(part='snippet,contentDetails,statistics', id=vid)
    item_dct['title'] = title
    video_dct[idx] = item_dct
    idx += 1

In [10]:
df = pd.DataFrame(video_dct.values())
df.head()

Unnamed: 0,comment_count,description,duration,favorite_count,id,like_count,title,view_count
0,11,Most people who work on the defensive side of ...,3380.0,0,iLPI0EGs6kY,65,DEF CON 25 SE Village - Jayson Street - Strate...,1490
1,2,It’s no secret that trying to change corporate...,2773.0,0,mYRXlCGMP0Q,34,"DEF CON 25 SE Village - Keith Conway, Cameron ...",1227
2,22,"On April 24, 2015, Apple launched themselves i...",2172.0,0,eJpbi-Qz6Jc,287,DEF CON 25 - Max Bazaliy - Jailbreaking Apple ...,28105
3,4,Michele Fincher is the Chief Influencing Agent...,1761.0,0,AZw8WugNyIQ,24,DEF CON 25 SE Village - Michele Fincher - Are ...,1059
4,47,What if your wireless mouse was an effective a...,2555.0,0,00A36VABIA4,480,DEF CON 24 - Marc Newlin - MouseJack: Injectin...,41354


In [11]:
from pulp import *

In [12]:
# Step 1 - Define the problem

prob = pulp.LpProblem('Select_DEF_CON_videos', pulp.LpMinimize)
print prob

Select_DEF_CON_videos:
MINIMIZE
None
VARIABLES



In [13]:
# Step 2 - Create Decision Variables
## Here, each video is a decision variabe with value 0 or 1, indicating select or not

decision_vars = []

for vid in df['id']:
    video_var = pulp.LpVariable(vid, lowBound = 0, upBound = 1, cat = 'Integer')  # make it as binary variable
    decision_vars.append(video_var)
    
print len(decision_vars)
print decision_vars[0]

47
iLPI0EGs6kY


In [14]:
# Step 3 - Define Objective Function
## Here, I want to choose those with larger likes/views
## namely, smaller int(round(views/likes)), since here requires integer

total_view2likes = ''

for ridx, r in df.iterrows():
    for i in range(len(decision_vars)):
        if ridx == i:
            views = float(r['view_count'])
            likes = float(r['like_count'])+0.0001  # in case it's 0
            view2likes = int(round(views/likes))
            formula = view2likes*decision_vars[i]  # Here, it's different from int*str, you can try to print out
            total_view2likes += formula
prob += total_view2likes

In [15]:
# Step 4 - Set Constranits
## The total time I could spend is around 40 mins (2400 sec) per day, on average
## For 1 week video selection, 7 videos are enough

my_available_time = 2400*7
my_watch_count = 7

In [16]:
total_watch_time = ''

for ridx, r in df.iterrows():
    for i in range(len(decision_vars)):
        if ridx == i:
            duration = float(r['duration'])
            formula = duration*decision_vars[i]  # Here, it's different from int*str, you can try to print out
            total_watch_time += formula
prob += (total_watch_time == my_available_time)

In [17]:
total_watch_count = ''

for ridx, r in df.iterrows():
    for i in range(len(decision_vars)):
        if ridx == i:
            formula = decision_vars[i]  # Here, it's different from str, even if you try to print out
            total_watch_count += formula
prob += (total_watch_count == my_watch_count)

In [18]:
print prob

Select_DEF_CON_videos:
MINIMIZE
86*00A36VABIA4 + 1510000*1Z8oJgtyw_8 + 124*1zTsfs4Q6IY + 122*2vdvINDmlX8 + 34*3bb87rb70jU + 54*9uBzqiYMzLg + 44*AZw8WugNyIQ + 183*EPIVjqExh9s + 130*EshjrNOs2ps + 42*FNtVqHaWFbU + 108*G_lmXqBUtZc + 206*HI6Hviqpi3A + 40*HlUe0TUHOIc + 2240000*IJT0bnZXvK0 + 86*ImgaEqOQlSQ + 93*MbIDrs_mB20 + 80*MgXhjUzi_I0 + 82*NbHL0SYlrSQ + 31*OjuLV35jVKo + 22*PNuPkpLuaws + 34*Rw0aJQnYXq0 + 48*TTx7mHzyX8c + 115*UGVi3ZX5IqM + 115*UJdxrhERDyM + 157*UYgBLUhHrCw + 81*VFnCRVWBISY + 34*VeXlppLn5H4 + 32*_T_Uj8_A0QA + 76*bbDAa0syz5A + 280*dCC7k4C0YMg + 98*eJpbi_Qz6Jc + 84*e_TQTDrRyWI + 94*fHDxu2sgGMQ + 48*fp7Pq7_tHsY + 119*hL_6uRfoGvU + 44*heKMPMahF2E + 42*iAOOdYsK7MM + 23*iLPI0EGs6kY + 65*kSuq3Ry9PLQ + 76*l1OFH_H8PjQ + 103*m1lhGqNCZlA + 36*mYRXlCGMP0Q + 48*mxI_2On_fG8 + 69*npaVgQxYUbQ + 76*r4XntiyXMnA + 38*v9vIcfLrmiA + 31*yJCq8ajdd0U + 0
SUBJECT TO
_C1: 2555 00A36VABIA4 + 266 1Z8oJgtyw_8 + 1225 1zTsfs4Q6IY + 3158 2vdvINDmlX8
 + 1974 3bb87rb70jU - 9uBzqiYMzLg + 1761 AZw8WugNyIQ + 2

In [19]:
# Step 5 - Optimization

optimization_result = prob.solve()

assert optimization_result == pulp.LpStatusOptimal
print('Status:', LpStatus[prob.status])
print('Optimal Solution to the problem: ', value(prob.objective))
print('Individual decision variables: ')
for v in prob.variables():
    print(v.name, '=', v.varValue)

('Status:', 'Optimal')
('Optimal Solution to the problem: ', 236.0)
Individual decision variables: 
('00A36VABIA4', '=', 0.0)
('1Z8oJgtyw_8', '=', 0.0)
('1zTsfs4Q6IY', '=', 0.0)
('2vdvINDmlX8', '=', 0.0)
('3bb87rb70jU', '=', 0.0)
('9uBzqiYMzLg', '=', 0.0)
('AZw8WugNyIQ', '=', 1.0)
('EPIVjqExh9s', '=', 0.0)
('EshjrNOs2ps', '=', 0.0)
('FNtVqHaWFbU', '=', 0.0)
('G_lmXqBUtZc', '=', 0.0)
('HI6Hviqpi3A', '=', 0.0)
('HlUe0TUHOIc', '=', 0.0)
('IJT0bnZXvK0', '=', 0.0)
('ImgaEqOQlSQ', '=', 0.0)
('MbIDrs_mB20', '=', 0.0)
('MgXhjUzi_I0', '=', 0.0)
('NbHL0SYlrSQ', '=', 0.0)
('OjuLV35jVKo', '=', 0.0)
('PNuPkpLuaws', '=', 1.0)
('Rw0aJQnYXq0', '=', 0.0)
('TTx7mHzyX8c', '=', 0.0)
('UGVi3ZX5IqM', '=', 0.0)
('UJdxrhERDyM', '=', 0.0)
('UYgBLUhHrCw', '=', 0.0)
('VFnCRVWBISY', '=', 0.0)
('VeXlppLn5H4', '=', 0.0)
('_T_Uj8_A0QA', '=', 0.0)
('bbDAa0syz5A', '=', 0.0)
('dCC7k4C0YMg', '=', 0.0)
('eJpbi_Qz6Jc', '=', 0.0)
('e_TQTDrRyWI', '=', 0.0)
('fHDxu2sgGMQ', '=', 0.0)
('fp7Pq7_tHsY', '=', 0.0)
('hL_6uRfoGvU', 

In [21]:
# Output selected videos
selecte_ids = {}
for v in prob.variables():
    if v.varValue == 1.0:
        selecte_ids[v.name] = ''
    
selected_df = pd.DataFrame(selecte_ids.keys(), columns=['id'])
selected_df_all = pd.merge(selected_df, df,  how='inner', on = ['id','id'])
selected_df_all[['view_count', 'like_count']] = selected_df_all[['view_count', 'like_count']].astype(float)
selected_df_all['view2like'] = selected_df_all['view_count']/selected_df_all['like_count']
pd.options.display.max_colwidth = 99
selected_df_all[['title','view_count','like_count','duration', 'view2like', 'id']].sort_values('view2like')
# 'view2like' smaller the better

Unnamed: 0,title,view_count,like_count,duration,view2like,id
4,DEF CON 25 SE Village - Chris Hadnagy - SE vs Predator: Using SE In Ways I Never Thought,1747.0,79.0,3135.0,22.113924,PNuPkpLuaws
3,DEF CON 25 SE Village - Jayson Street - Strategies on Securing Your Banks and Enterprises,1490.0,65.0,3380.0,22.923077,iLPI0EGs6kY
2,"DEF CON 25 SE Village - Brent White, Tim Roberts - Skills For A Red Teamer",1516.0,49.0,3068.0,30.938776,yJCq8ajdd0U
0,"DEF CON 25 SE Village - Keith Conway, Cameron Craig - Change Agents How to Affect Corp. Culture",1227.0,34.0,2773.0,36.088235,mYRXlCGMP0Q
6,DEF CON 25 - Nathan Seidle - Open Source Safe Cracking Robots,31513.0,828.0,1645.0,38.059179,v9vIcfLrmiA
1,DEF CON 24 - Hunter Scott - RT to Win: 50 lines of Python made me the luckiest guy on Twitter,35648.0,853.0,1038.0,41.791325,iAOOdYsK7MM
5,DEF CON 25 SE Village - Michele Fincher - Are You Killing Your Security Program?,1059.0,24.0,1761.0,44.125,AZw8WugNyIQ


In [27]:
# Get Youtube URLS

prefix = "https://www.youtube.com/watch?v="
selected_df_all['url'] = prefix + selected_df_all['id'].astype(str)
selected_df_all[['title','url', 'view2like']].sort_values('view2like')

Unnamed: 0,title,url,view2like
4,DEF CON 25 SE Village - Chris Hadnagy - SE vs Predator: Using SE In Ways I Never Thought,https://www.youtube.com/watch?v=PNuPkpLuaws,22.113924
3,DEF CON 25 SE Village - Jayson Street - Strategies on Securing Your Banks and Enterprises,https://www.youtube.com/watch?v=iLPI0EGs6kY,22.923077
2,"DEF CON 25 SE Village - Brent White, Tim Roberts - Skills For A Red Teamer",https://www.youtube.com/watch?v=yJCq8ajdd0U,30.938776
0,"DEF CON 25 SE Village - Keith Conway, Cameron Craig - Change Agents How to Affect Corp. Culture",https://www.youtube.com/watch?v=mYRXlCGMP0Q,36.088235
6,DEF CON 25 - Nathan Seidle - Open Source Safe Cracking Robots,https://www.youtube.com/watch?v=v9vIcfLrmiA,38.059179
1,DEF CON 24 - Hunter Scott - RT to Win: 50 lines of Python made me the luckiest guy on Twitter,https://www.youtube.com/watch?v=iAOOdYsK7MM,41.791325
5,DEF CON 25 SE Village - Michele Fincher - Are You Killing Your Security Program?,https://www.youtube.com/watch?v=AZw8WugNyIQ,44.125
