In [1]:
import pandas as pd
import numpy as np

from decimal import Decimal, Context

import spotipy
from spotipy.oauth2 import SpotifyOAuth

import boto3

In [5]:
client_secret = "b4b4adad93d2427b9c22b3c0b8eead3b"
client_id = "3e732b6a4f1e43f8940a372fb205a460"
redirect_uri = "http://localhost:8889/callback"

In [3]:
valid_audio_features = ["danceability", "energy", "key", "loudness", "mode", "speechiness", "acousticness",
                        "instrumentalness", "liveness", "valence", "tempo"]
user_lib_scope = "user-library-read"

In [6]:
def get_saved_tracks():
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=user_lib_scope, client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri))

    all_results = []

    offset = 0
    results = sp.current_user_saved_tracks(limit=50, offset=offset)
    while results["next"] is not None:
        if offset > 0:
            results = sp.current_user_saved_tracks(limit=50, offset=offset)
        for idx, item in enumerate(results['items']):
            track = item['track']
            all_results.append(track)
        offset += 50
        
    return all_results


def clean_data(track):
    track_id = track["id"]
    album = track["album"]["name"]
    artist = track["artists"][0]["name"]
    duration = track["duration_ms"] / 1000
    explicit = track["explicit"]
    name = track["name"]
    popularity = track["popularity"]
    
    track_info = {
        "track_id": track_id,
        "album": album,
        "artist": artist,
        "duration": duration,
        "explicit": explicit,
        "track_name": name,
        "popularity": popularity
    }
    
    return track_info


def get_change_in_feature(element, past_element, feature):
    if past_element[f"{feature}_confidence"] > 0.5 and element[f"{feature}_confidence"] > 0.5: # enough confidence
        if np.abs(past_element[feature] - element[feature]) / element[feature] > 0.05: # 5% change
            return True
    return False


def get_audio_features(track):
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=user_lib_scope, client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri))
    
    track_id = track["track_id"]
    features = sp.audio_features(track_id)
    features = {k:v for k, v in features[0].items() if k in valid_audio_features}
    
    track_info = {**track, **features}
    
    return track_info


def get_advanced_audio_features(track):
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=user_lib_scope, client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri))
    
    track_id = track["track_id"]
    features = sp.audio_analysis(track_id)
    sections = features["sections"]
    
    track_info = {**track}
    track_info["raw_sections"] = sections
    
    num_sections = len(sections)
    sections_avg_duration = np.mean([e["duration"] for e in sections])
    
    tempo_changes = 0
    key_changes = 0
    mode_changes = 0
    time_signature_changes = 0
    dynamics_changes = 0    
    
    for i in range(1, len(sections)):
        if get_change_in_feature(sections[i], sections[i-1], "tempo"):
            tempo_changes += 1
        if get_change_in_feature(sections[i], sections[i-1], "key"):
            key_changes += 1
        if get_change_in_feature(sections[i], sections[i-1], "mode"):
            mode_changes += 1
        if get_change_in_feature(sections[i], sections[i-1], "time_signature"):
            time_signature_changes += 1
        if np.abs(sections[i-1]["loudness"] - sections[i]["loudness"]) / sections[i]["loudness"] > 0.1: # no confidence, wider interval
            dynamics_changes += 1
    
    track_info["num_sections"] = num_sections
    track_info["sections_avg_duration"] = sections_avg_duration
    track_info["tempo_changes"] = tempo_changes
    track_info["key_changes"] = key_changes
    track_info["mode_changes"] = mode_changes
    track_info["time_signature_changes"] = time_signature_changes
    track_info["dynamics_changes"] = dynamics_changes
    
    return track_info


def parse_numeric_data(data):
    ctx = Context(prec=38)
    
    for entry in data:
        for k, v in entry.items():
            if type(v) in (float, int, np.number, np.float64, np.int64):
                entry[k] = ctx.create_decimal_from_float(v)
            elif isinstance(v, list):
                for e in v:
                    for k2, v2 in e.items():
                        e[k2] = ctx.create_decimal_from_float(v2)

In [7]:
all_tracks = get_saved_tracks()

results = []

for track in all_tracks:
    clean_track = clean_data(track)
    clean_track = get_audio_features(clean_track)
    clean_track = get_advanced_audio_features(clean_track)
    
    results.append(clean_track)

  if np.abs(past_element[feature] - element[feature]) / element[feature] > 0.05: # 5% change
  if np.abs(past_element[feature] - element[feature]) / element[feature] > 0.05: # 5% change


In [None]:
parse_numeric_data(results)

In [None]:
session = boto3.Session(profile_name="default")

dynamodb = session.resource("dynamodb", region_name="eu-west-1")
table = dynamodb.Table("track_info")

with table.batch_writer() as batch:
    for item in results:
        batch.put_item(Item=item)

In [19]:
sp = SpotifyOAuth(client_id, client_secret, redirect_uri, scope="user-top-read")

auth_url = sp.get_authorize_url()
print(f"Please navigate here: {auth_url}")

response = input("Paste the URL you were redirected to: ")
code = sp.parse_response_code(response)
token_info = sp.get_access_token(code)

Please navigate here: https://accounts.spotify.com/authorize?client_id=3e732b6a4f1e43f8940a372fb205a460&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8889%2Fcallback&scope=user-top-read


Paste the URL you were redirected to:  http://localhost:8889/callback?code=AQD0E6gQS0QcFiTFmfL5era_KG22St_CJougm5PgqcyczTO40QQlUHCtoQOyNv4mFDVszUE9EqA6PfovKZF7mjHImyuxNGATIxMTei0u4hLxCDFh9I2FDmtbkTjRnt8wPEh6kCrCswgfoUZJ3g26s0PB27UA6QdwysXnNwYCeSytBQt50OUlAA7eFry4NbH0_Q


  token_info = sp.get_access_token(code)


In [20]:
token_info

{'access_token': 'BQDhMYG39tq-HVU0xW3ZxNc-SfVA0WnDyv-p7zMX5VUm8-WJsm4zBUoEyJdNAtgri16YcgqDWDwUveldBAZVHUkAAVx9L7rgLol0kutzzv5x4riYU_w08hRCYARDGjOcPZ0K0z8_43fAoFEZT0010tZ5yhIAB6ltIjpEt3ZkTfcvvKFBNKM-rIjOVADACB9G9JdK8GRPmquWAA',
 'token_type': 'Bearer',
 'expires_in': 3600,
 'refresh_token': 'AQBj-3bJZBS-3Vtg4i6boRfCoFWzcB4MgKZNxrcnf-QtqvCs2z0zh6g0UmXkNpJEPwG1V4Y-3qoL8xgwbfoarKvqGAx_CChptp_cFafnnbFGujHu5VOVz3etQ-PhS6RQWWo',
 'scope': 'user-top-read',
 'expires_at': 1716889014}

In [25]:
# sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope="user-read-recently-played", client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri))


a = sp.current_user_recently_played(limit=20)


In [39]:
results["items"][0]

{'external_urls': {'spotify': 'https://open.spotify.com/artist/2SRIVGDkdqQnrQdaXxDkJt'},
 'followers': {'href': None, 'total': 214230},
 'genres': ['progressive metal'],
 'href': 'https://api.spotify.com/v1/artists/2SRIVGDkdqQnrQdaXxDkJt',
 'id': '2SRIVGDkdqQnrQdaXxDkJt',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/ab6761610000e5eb709c28ab620aff25f7584281',
   'width': 640},
  {'height': 320,
   'url': 'https://i.scdn.co/image/ab67616100005174709c28ab620aff25f7584281',
   'width': 320},
  {'height': 160,
   'url': 'https://i.scdn.co/image/ab6761610000f178709c28ab620aff25f7584281',
   'width': 160}],
 'name': 'Haken',
 'popularity': 43,
 'type': 'artist',
 'uri': 'spotify:artist:2SRIVGDkdqQnrQdaXxDkJt'}

In [36]:
scope = 'user-top-read'

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope, client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri))

results = sp.current_user_top_artists(time_range="long_term", limit=20)

results

{'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2SRIVGDkdqQnrQdaXxDkJt'},
   'followers': {'href': None, 'total': 214230},
   'genres': ['progressive metal'],
   'href': 'https://api.spotify.com/v1/artists/2SRIVGDkdqQnrQdaXxDkJt',
   'id': '2SRIVGDkdqQnrQdaXxDkJt',
   'images': [{'height': 640,
     'url': 'https://i.scdn.co/image/ab6761610000e5eb709c28ab620aff25f7584281',
     'width': 640},
    {'height': 320,
     'url': 'https://i.scdn.co/image/ab67616100005174709c28ab620aff25f7584281',
     'width': 320},
    {'height': 160,
     'url': 'https://i.scdn.co/image/ab6761610000f178709c28ab620aff25f7584281',
     'width': 160}],
   'name': 'Haken',
   'popularity': 43,
   'type': 'artist',
   'uri': 'spotify:artist:2SRIVGDkdqQnrQdaXxDkJt'},
  {'external_urls': {'spotify': 'https://open.spotify.com/artist/23ytwhG1pzX6DIVWRWvW1r'},
   'followers': {'href': None, 'total': 317483},
   'genres': ['djent', 'progressive metal'],
   'href': 'https://api.spotify.com/v1

In [23]:
ssm = boto3.client("ssm", region_name="eu-west-1")
client_secret = ssm.get_parameter(Name="SPOTIFY_CLIENT_SECRET", WithDecryption=True)
client_secret = client_secret["Parameter"]["Value"]
client_id = ssm.get_parameter(Name="SPOTIFY_CLIENT_ID", WithDecryption=True)
client_id = client_id["Parameter"]["Value"]

refresh_token = ssm.get_parameter(Name="SPOTIFY_REFRESH_TOKEN_TOP_READ", WithDecryption=True)
refresh_token = refresh_token["Parameter"]["Value"]

sp_oauth = SpotifyOAuth(client_id, client_secret, redirect_uri, scope="user-top-read")
token_info = sp_oauth.refresh_access_token(refresh_token)
sp = spotipy.Spotify(auth=token_info['access_token'])

sp.current_user_top_artists(time_range="long_term", limit=20)

{'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2SRIVGDkdqQnrQdaXxDkJt'},
   'followers': {'href': None, 'total': 214230},
   'genres': ['progressive metal'],
   'href': 'https://api.spotify.com/v1/artists/2SRIVGDkdqQnrQdaXxDkJt',
   'id': '2SRIVGDkdqQnrQdaXxDkJt',
   'images': [{'height': 640,
     'url': 'https://i.scdn.co/image/ab6761610000e5eb709c28ab620aff25f7584281',
     'width': 640},
    {'height': 320,
     'url': 'https://i.scdn.co/image/ab67616100005174709c28ab620aff25f7584281',
     'width': 320},
    {'height': 160,
     'url': 'https://i.scdn.co/image/ab6761610000f178709c28ab620aff25f7584281',
     'width': 160}],
   'name': 'Haken',
   'popularity': 43,
   'type': 'artist',
   'uri': 'spotify:artist:2SRIVGDkdqQnrQdaXxDkJt'},
  {'external_urls': {'spotify': 'https://open.spotify.com/artist/23ytwhG1pzX6DIVWRWvW1r'},
   'followers': {'href': None, 'total': 317483},
   'genres': ['djent', 'progressive metal'],
   'href': 'https://api.spotify.com/v1

In [18]:
# import pickle

# with open("test_file.pkl", "wb") as file:
#     pickle.dump(results, file, protocol=pickle.HIGHEST_PROTOCOL)

In [55]:
# table.put_item(Item=results[0])

{'ResponseMetadata': {'RequestId': 'B88LC5NNBIG2883584TG6NQK43VV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Mon, 27 May 2024 12:32:27 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'B88LC5NNBIG2883584TG6NQK43VV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2745614147'},
  'RetryAttempts': 0}}

In [40]:
! pip list

Package                  Version
------------------------ -----------
aiofiles                 22.1.0
aiosqlite                0.19.0
anyio                    3.6.2
argon2-cffi              21.3.0
argon2-cffi-bindings     21.2.0
arrow                    1.2.3
asttokens                2.2.1
attrs                    23.1.0
Babel                    2.12.1
backcall                 0.2.0
beautifulsoup4           4.12.2
bleach                   6.0.0
boto3                    1.34.113
botocore                 1.34.113
certifi                  2023.5.7
cffi                     1.15.1
charset-normalizer       3.1.0
colorama                 0.4.6
comm                     0.1.3
contourpy                1.1.0
cycler                   0.11.0
debugpy                  1.6.7
decorator                5.1.1
defusedxml               0.7.1
easyocr                  1.7.1
executing                1.2.0
fastjsonschema           2.16.3
filelock                 3.13.1
fonttools                4.40.0
fqdn      


[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


{'Parameter': {'Name': 'SPOTIFY_CLIENT_SECRET',
  'Type': 'SecureString',
  'Value': 'b4b4adad93d2427b9c22b3c0b8eead3b',
  'Version': 1,
  'LastModifiedDate': datetime.datetime(2024, 5, 27, 14, 51, 20, 960000, tzinfo=tzlocal()),
  'ARN': 'arn:aws:ssm:eu-west-1:804206532268:parameter/SPOTIFY_CLIENT_SECRET',
  'DataType': 'text'},
 'ResponseMetadata': {'RequestId': '1d44d69b-005c-4bde-8cb3-532cc78df930',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Mon, 27 May 2024 12:52:31 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '251',
   'connection': 'keep-alive',
   'x-amzn-requestid': '1d44d69b-005c-4bde-8cb3-532cc78df930'},
  'RetryAttempts': 0}}