# Getting video ids with Selenium

### Since searching through the YouTube's API consumes 100 "units" of your very limited 10 000 quota, you have to get those ids for cheap. 

The YouTube's url ends with the video id. Basic RegEx would make it.  

Following code basically:
- Gets tracks from Spotifys playlist (given a user & playlist_id)
- Simulates a YouTube search of every track. 
- Get the url of the first result
- Extract video id

You are ready to loop the video ids through the function in SpotifyYoutube.ipynb


In [1]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from tqdm import tqdm
import pandas as pd
import re
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from chromedriver_py import binary_path # this will get you the path variable
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


def get_playlist_tracks(username, s_playlist_id):
    results = sp.user_playlist_tracks(username,s_playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks


def get_playlist_namedesc(s_playlist_id):
    """
    Function gets playlist name and description
    """
    results = sp.playlist(s_playlist_id)
    return results['name'], results['description']


def making_strings(tracks):
    """
    Function creates a string like 'name of artist - name of song'
    """
    final_list = []
    for i, track in enumerate(tracks):
        string = ''
        string += tracks[i]['track']['artists'][0]['name']
        string += ' - '
        string += tracks[i]['track']['name']
        final_list.append(string)
    return final_list


def find_youtube_link(link):
    """
    Function extracts video id from url
    """
    try:
        return re.search(r'v=([\w-]+)', link).group(1)
    except:
        return np.nan



In [2]:
%%time
#spotify creds
client_id = 'd4960464df3e4d528038cbf583115ab3'
client_secret = 'b388d3fba3df4b97b2b3a444f2fb710e'

# you have to authenticate
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id,
                                               client_secret=client_secret,
                                               #IMPORTANT, THIS URL HAS TO BE ENABLED ON SPOTIFYS CONSOLE
                                               redirect_uri="https://www.spotify.com/mx/home/",
                                               scope="user-library-read"))

CPU times: user 498 µs, sys: 39 µs, total: 537 µs
Wall time: 571 µs


In [3]:
%%time
#playlist to get
username = 'spotify:user:12127592463'
s_playlist_id = 'spotify:playlist:0armRErDJRzQYuf9iph894'


playlist_name, playlist_description = get_playlist_namedesc(s_playlist_id)
print('Getting tracks in:', playlist_name)
tracks = get_playlist_tracks(username,s_playlist_id)
final_list = making_strings(tracks)
print('Got',len(final_list), 'tracks')

print('\nHere are the first 5 songs of', playlist_name,':')
for song in final_list[:4]:
    print(song)

Getting tracks in: Chill
Got 28 tracks

Here are the first 5 songs of Chill :
Naked Giants - Slow Dance II
Pink Floyd - Shine On You Crazy Diamond (Pts. 1-5)
Flume - Drop the Game
King Gizzard & The Lizard Wizard - Work This Time
CPU times: user 80.3 ms, sys: 23.5 ms, total: 104 ms
Wall time: 576 ms


In [4]:
%%time
url = 'https://www.youtube.com/'
links = []
titles = []

driver = webdriver.Chrome(executable_path=binary_path)

for song in tqdm(final_list):
    #always return to youtube.com
    driver.get(url)
    
    #waits until search bar is clickable
    search = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, "search")))
    
    search.click()
    
    # writes song in search bar
    search.send_keys(song)
    
    # hits enter key
    search.send_keys(Keys.RETURN)
    
    # waits until the title is clickable (I encountered two types of layouts)
    try:
        titulo = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "/html/body/ytd-app/div/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer/div[3]/ytd-video-renderer[1]/div[1]/div/div[1]/div/h3/a")))
        titulo = driver.find_element_by_xpath('/html/body/ytd-app/div/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer/div[3]/ytd-video-renderer[1]/div[1]/div/div[1]/div/h3/a')
    except:
        titulo = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "/html/body/ytd-app/div/div/ytd-masthead/div[3]/div[2]/ytd-searchbox/form/div/div[1]/input")))
        titulo = driver.find_element_by_xpath('/html/body/ytd-app/div/div/ytd-masthead/div[3]/div[2]/ytd-searchbox/form/div/div[1]/input')
        continue
        
    #append url and video title to lists
    links.append(titulo.get_attribute('href'))
    titles.append(titulo.text)
    
driver.quit()
    

# get results in dataframe
df = pd.DataFrame(list(zip(final_list, titles, links)), 
           columns =['spotify_title', 'youtube_title', 'youtube_link']) 

#extract video id
df['youtube_video_id']=df.youtube_link.apply(find_youtube_link)

df.head()

100%|██████████| 28/28 [02:12<00:00,  4.74s/it]


CPU times: user 811 ms, sys: 158 ms, total: 970 ms
Wall time: 2min 19s


Unnamed: 0,spotify_title,youtube_title,youtube_link,youtube_video_id
0,Naked Giants - Slow Dance II,Naked Giants - Slow Dance II (Live on KEXP),https://www.youtube.com/watch?v=Z5kgvfufOnE,Z5kgvfufOnE
1,Pink Floyd - Shine On You Crazy Diamond (Pts. ...,Shine On You Crazy Diamond (Parts I-V),https://www.youtube.com/watch?v=54W8kktFE_o,54W8kktFE_o
2,Flume - Drop the Game,Flume & Chet Faker - Drop the Game [Official M...,https://www.youtube.com/watch?v=6vopR3ys8Kw,6vopR3ys8Kw
3,King Gizzard & The Lizard Wizard - Work This Time,King Gizzard and The Lizard Wizard - Work this...,https://www.youtube.com/watch?v=QDqgyxw0igs,QDqgyxw0igs
4,Miike Snow - Genghis Khan - Louis the Child Remix,Miike Snow - Genghis Khan (Louis The Child Remix),https://www.youtube.com/watch?v=NjOUdjN8QSc,NjOUdjN8QSc
