# Carga de librerías

In [None]:
# Basic
import pandas as pd
import numpy as np
import time
import os
from tqdm import tqdm
import re
# Scraping
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common import exceptions as SeleniumExceptions
from selenium.webdriver.common.keys import Keys

# Load web driver

In [None]:
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument("--start-maximized") # Maximize the browser window to ensure all elements are visible
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),
                          options=chromeOptions)

In [None]:
root = "https://open.spotify.com/collection/tracks" # URL for liked songs

In [None]:
# Load cookies json
import json
with open('cookies.json', 'r') as f:
    cookies = json.load(f)

In [None]:
for cookie in tqdm(cookies):
  u = driver.execute_cdp_cmd(
                  'Network.setCookie',
                  {
                      'domain': cookie['domain'],
                      'path': cookie['path'],
                      'name': cookie['name'],
                      'value': cookie['value'],
                      'httpOnly': cookie['httpOnly'],
                      'secure': cookie['secure'],
                  },
              )

In [None]:
driver.get(root)

# Start scraping

In [None]:
# Close player aside if it exists (can interfere with selectors and general layout)
selector_aside_player = (By.CSS_SELECTOR, "aside[aria-label]")
selector_close_aside = (By.CSS_SELECTOR, "div[data-testid='PanelHeader_CloseButton']>button")
try:
    WebDriverWait(driver, 3).until(EC.presence_of_element_located(selector_aside_player))
    driver.find_element(*selector_aside_player).find_element(*selector_close_aside).click()
except SeleniumExceptions.TimeoutException:
    pass

In [None]:
df_songs = pd.DataFrame(columns=['title', 'artist', 'artist_link', 'artist_description'])

## Scroll until load all songs

In [None]:
selector_songs_parent = (By.CSS_SELECTOR, "div[data-testid='track-list']")
selector_songs_header = (By.CSS_SELECTOR, "div.IpXjqI9ouS_N5zi0WM88")
selector_songs_container = (By.CSS_SELECTOR, "div.JUa6JJNj7R_Y3i4P8YUX")
selector_song_title = (By.CSS_SELECTOR, "a[data-testid='internal-track-link']")
selector_song_artist = (By.CSS_SELECTOR, "a[data-testid='internal-track-link']~span a") # Just first artist is taken
try:
    # Get the number of songs
    songs_parent = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(selector_songs_parent)
    )
    song_count = songs_parent.get_attribute("aria-rowcount")
    song_count = int(song_count) if song_count else 0
    # Song container to scroll to
    songs_container = songs_parent.find_element(*selector_songs_container)
    body = driver.find_element(By.TAG_NAME, "body")
    for i in tqdm(range(2, song_count + 1)): # Row 1 is the header
      song = None
      for j in range(10): # Scroll until find the song, max 10 tries
        try:
          song = songs_parent.find_element(By.CSS_SELECTOR, f"div[role='row'][aria-rowindex='{i}']")
          break
        except SeleniumExceptions.NoSuchElementException:
          # Use Keys.PAGE_DOWN to scroll
          songs_parent.find_element(*selector_songs_header).click() # Safe zone to click and enter to focus
          body.send_keys(Keys.PAGE_DOWN)
      if not song:
        raise Exception(f"Song {i} not found")
      song_title = song.find_element(*selector_song_title)
      song_artist = song.find_element(*selector_song_artist)
      song_artist_link = song_artist.get_attribute("href")
      
      df_song = pd.DataFrame({'title': [song_title.text],
                              'artist': [song_artist.text],
                              'artist_link': [song_artist_link],
                              'artist_description': [None]})
      df_songs = pd.concat([df_songs, df_song], ignore_index=True)
      
except SeleniumExceptions.TimeoutException:
    raise Exception("Check if the page is loaded correctly and the CSS selector is correct")