In [1]:
from __future__ import annotations
from selenium import webdriver
from selenium.webdriver.common.by import By

def driversetup():
    options = webdriver.ChromeOptions()
    #run Selenium in headless mode
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    #overcome limited resource problems
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument("lang=en")
    #open Browser in maximized mode
    options.add_argument("start-maximized")
    #disable infobars
    options.add_argument("disable-infobars")
    #disable extension
    options.add_argument("--disable-extensions")
    options.add_argument("--incognito")
    options.add_argument("--disable-blink-features=AutomationControlled")
    
    driver = webdriver.Chrome(options=options)

    # driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined});")

    return driver

In [159]:
import urllib
from configparser import ConfigParser

config: ConfigParser = ConfigParser()
config.read('config_private.ini')

client_id = "1fd1295e25e841f39b73d0fab94962bb"
client_secret = "b1f1cd3dcf9d4992bd022e0029250b96"
redirect_uri = "http://localhost:8888/callback"

scope = ["playlist-modify-public", "user-read-private"]

params = {
    "response_type": "code",
    "client_id": client_id,
    "scope": scope,
    "redirect_uri": redirect_uri,
    "state": "abcdefghijklmnop"  # some random 16 digit string? - needed?
}
auth_params = urllib.parse.urlencode(query=params)
auth_url = "https://accounts.spotify.com/authorize"

In [3]:
options = webdriver.ChromeOptions()
options.add_argument("--incognito")
driver = webdriver.Chrome(options=options)

driver.get(f"{auth_url}?{auth_params}")

In [4]:
driver.find_element(By.ID, "login-username").send_keys(config["SPOTIFY"]["username"])
driver.find_element(By.ID, "login-password").send_keys(config["SPOTIFY"]["password"])

driver.find_element(By.ID, "login-button").click()

In [5]:
parsed_url = urllib.parse.urlparse(driver.current_url)
parsed_url

ParseResult(scheme='https', netloc='accounts.spotify.com', path='/en/authorize', params='', query='scope=playlist-modify-public&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8888%2Fcallback&state=abcdefghijklmnop&client_id=1fd1295e25e841f39b73d0fab94962bb', fragment='')

In [None]:
# If we haven't already authorized, hit confirm
if parsed_url.netloc == "accounts.spotify.com":
    driver.find_element(By.XPATH, "//button[@data-testid='auth-accept']").click()
elif parsed_url.netloc == urllib.parse.urlparse(redirect_uri).netloc:
    pass  # we've already authorized
else:
    raise Exception("Unknown redirect")
    
parsed_url = urllib.parse.urlparse(driver.current_url)
code = urllib.parse.parse_qs(parsed_url.query)["code"]

parsed_url

In [8]:

parsed_url = urllib.parse.urlparse(driver.current_url)
code = urllib.parse.parse_qs(parsed_url.query)["code"]

parsed_url

ParseResult(scheme='http', netloc='localhost:8888', path='/callback', params='', query='code=AQBPq2JVdcS8Bhw6j0RX9CS6cSCnuQjuuubAPpOJ3b47Bw1ovsemHL8sITw-kykNM19wbNr0OnpUpx3e4HdQbq4qzsC42igbr-L-_-8dP7qJzA5Y-RczkXUkHQk87r5QpeNFs72D745d44zGIXvz4GTEoYPvfaz78fjwI0dbbvDKtUCIzyic7R5EA0UCtCNniRctnEHbxKGb6g&state=abcdefghijklmnop', fragment='')

In [13]:
import requests

token_url = "https://accounts.spotify.com/api/token"
token_data = {
  "code": code,
  "redirect_uri": redirect_uri,
  "grant_type": "authorization_code",
}
headers = {
  # "Authorization": "Basic " + (new Buffer(client_id + ':' + client_secret).toString('base64')),
}

r = requests.post(
    url=token_url, 
    data=token_data,
    auth=requests.auth.HTTPBasicAuth('client_id', 'client_secret'),
)

In [160]:
from spotipy import Spotify, SpotifyOAuth, CacheFileHandler

handler = CacheFileHandler(username=config["SPOTIFY"]["username"])
auth_manager = SpotifyOAuth(client_id, client_secret, redirect_uri, scope=scope, cache_handler=handler)

spotify = Spotify(auth_manager=auth_manager)

In [196]:
token_info = auth_manager.cache_handler.get_cached_token()
if auth_manager.is_token_expired(token_info=token_info):
    auth_manager.refresh_access_token(refresh_token=token_info["refresh_token"])

In [37]:
import json
from os import listdir
from pathlib import Path
history_path = Path.home().joinpath("Downloads").joinpath("MyData")
history_files = sorted([f for f in listdir(history_path) if f.startswith("StreamingHistory")])

history_all = []
for filename in history_files:
    with open(history_path.joinpath(filename), encoding='utf-8') as f:  # some title contain unicode chars
        history_page = json.load(f)
        history_all.extend(history_page)

In [188]:

import re
def strip_non_alphanumeric(s):
    pattern = re.compile('[^a-zA-Z0-9_ ]+')
    return pattern.sub('', s)

# normalize not used yet
def normalize_search(search_str):
    words = search_str.lower().split(" ")
    remove_words = ["the", "a", "an"]
    filtered_words = list(filter(lambda x: x not in remove_words, words))
    normalized_search = " ".join(filtered_words)

    return normalized_search

def format_track_query(history_obj):
    artist = history_obj["artistName"]
    track = history_obj["trackName"]
    search_term = strip_non_alphanumeric(s=f"{artist} {track}")
    return search_term


def get_artist_id(history_obj):
    artist_name = history_obj['artistName'].lower()

    query = urllib.parse.quote_plus(strip_non_alphanumeric(artist_name))
    result = spotify.search(q=query, market="US", type="artist")  # limit 10 by default should be enough

    for item in result["artists"]["items"]:
        item_name = item["name"].lower()

        if item_name == artist_name:
            return item["id"]

def get_albums(artist_id):
    offset = 0
    limit = 50
    
    albums = []
    next = True
    while next:
        response = spotify.artist_albums(artist_id=artist_id, country="US", limit=limit, offset=offset)
        next = response["next"]
        offset += limit
        albums.extend(response["items"])

    return albums

def get_album_tracks(album_id):
    offset = 0
    limit = 50
    
    tracks = []
    next = True
    while next:
        response = spotify.album_tracks(album_id=album_id, market="US", limit=limit, offset=offset)
        next = response["next"]
        offset += limit
        tracks.extend(response["items"])

    return tracks


def get_track_id(history_obj):
    query = urllib.parse.quote_plus(format_track_query(history_obj=history_obj))
    result = spotify.search(q=query, market="US", type="track")

    track_name = history_obj['trackName'].lower()
    artist_name = history_obj['artistName'].lower()

    for item in result["tracks"]["items"]:
        item_name = item["name"].lower()
        item_artist = item["artists"][0]["name"].lower()
        if item_name == track_name and item_artist == artist_name:
            return item["id"]

    # Some tracks aren't searchable :( Look in artist's albums instead

    artist_id = get_artist_id(history_obj=history_obj)

    if not artist_id:
        print(f"Artist not found: {history_obj['artistName']}")
        return

    albums = get_albums(artist_id=artist_id)

    for album in albums:
        album_name = album["name"].lower()
        if album_name == track_name:
            album_tracks = get_album_tracks(album_id=album["id"])
            for album_track in album_tracks:
                album_track_name = album_track["name"].lower()
                if album_track_name == track_name:
                    return album_track["id"]
    
    print(f"Track `{history_obj['trackName']}` not found for artist `{history_obj['artistName']}`")
    return


In [198]:
playlist =  spotify.user_playlist_create(user=config["SPOTIFY"]["username"], name="Date Night Playback")

In [197]:
start_datetime = "2022-10-29 06:20"
end_datetime = "2022-10-29 12"
track_ids = []

search_count = 0
for history_obj in history_all:
    if history_obj["endTime"] < start_datetime or history_obj["endTime"] > end_datetime or history_obj["msPlayed"] < 60000:
        continue
    track_id = get_track_id(history_obj=history_obj)
    search_count += 1
    if track_id:
        track_ids.append(track_id)

print(f"search_count={search_count}, found_count={len(track_ids)}")

search_count=102, found_count=102


In [201]:
track_ids = [t for t in track_ids if t]
def get_chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]

playlist_add_items_max = 100
for chunk in get_chunks(l=track_ids, n=playlist_add_items_max):
    add_tracks_response = spotify.playlist_add_items(
        playlist_id=playlist["id"],
        items=chunk,
    )


In [72]:
history_obj = history_all[0]
get_track_id(history_obj=history_obj)

'4njORatgD4DQOZsgio3Wiq'

In [189]:
history_obj = {
    "endTime" : "2022-11-06 20:50",
    "artistName" : "Louis The Child",
    "trackName" : "Save Me From Myself (with NoMBe & Big Gigantic)",
    "msPlayed" : 211685
  }

query = urllib.parse.quote_plus(strip_non_alphanumeric(f"{history_obj['artistName']} {history_obj['trackName']}"))
result = spotify.search(q=query, type="track", market="US")

print (query)

print(history_obj)
result['tracks'].keys()
print(result['tracks']['items'][0].keys())
print([x['name'] for x in result['tracks']['items']])
print([x['artists'][0]["name"] for x in result['tracks']['items']])

result['tracks']['items'][0]['artists'][0]["name"]
result["albums"]

Louis+The+Child+Save+Me+From+Myself+with+NoMBe++Big+Gigantic
{'endTime': '2022-11-06 20:50', 'artistName': 'Louis The Child', 'trackName': 'Save Me From Myself (with NoMBe & Big Gigantic)', 'msPlayed': 211685}
dict_keys(['album', 'artists', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'is_playable', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
['Save Me From Myself (with NoMBe & Big Gigantic)', 'Save Me From Myself (with NoMBe & Big Gigantic) - Win And Woo Remix', "It's Strange", 'Miss Primetime (feat. Pell)', 'Got Me Like', 'Save Me From Myself', 'Big Love (with EARTHGANG & MNDR)', 'Weekend', 'Hole In My Heart (with Livingston)', 'Free (with Drew Love)']
['Louis The Child', 'Louis The Child', 'Louis The Child', 'Big Gigantic', 'Big Gigantic', 'Louis The Child', 'Louis The Child', 'Louis The Child', 'Louis The Child', 'Louis The Child']


KeyError: 'albums'

In [171]:
# spotify.artist("0QxJg8Lweby6eLUGSWeuId")
spotify.search(q="Snacks", market="US", type="artist")
# spotify.album_tracks(album_id="62i1Rngc3iKPYJShOQx1ly")["items"][0]["name"]
# response = spotify.artist_albums(artist_id="1E3qGlNjHcdqAYMT4Wi6cm", country="US", limit=50)  # DJ Susan
# albums = response["items"]
# for album in albums:
#     if album["name"] == "Chit Chat":
#         print(album)
# len(get_albums(artist_id="1E3qGlNjHcdqAYMT4Wi6cm"))

{'artists': {'href': 'https://api.spotify.com/v1/search?query=Snacks&type=artist&market=US&offset=0&limit=10',
  'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2FybCOkpsTiXYNccjPpiYh'},
    'followers': {'href': None, 'total': 4074},
    'genres': ['alabama indie'],
    'href': 'https://api.spotify.com/v1/artists/2FybCOkpsTiXYNccjPpiYh',
    'id': '2FybCOkpsTiXYNccjPpiYh',
    'images': [{'height': 640,
      'url': 'https://i.scdn.co/image/ab6761610000e5ebabb004a501efefe6c8a9b147',
      'width': 640},
     {'height': 320,
      'url': 'https://i.scdn.co/image/ab67616100005174abb004a501efefe6c8a9b147',
      'width': 320},
     {'height': 160,
      'url': 'https://i.scdn.co/image/ab6761610000f178abb004a501efefe6c8a9b147',
      'width': 160}],
    'name': 'Snacks',
    'popularity': 28,
    'type': 'artist',
    'uri': 'spotify:artist:2FybCOkpsTiXYNccjPpiYh'},
   {'external_urls': {'spotify': 'https://open.spotify.com/artist/2jeOQgTv66XWIOzCscYyQl'},
    'fo

In [80]:
driver.close()

KeyboardInterrupt: 