In [1]:
!pip install boto3
!pip install python-dotenv
!pip install requests

Collecting requests
  Downloading requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests)
  Downloading idna-3.6-py3-none-any.whl.metadata (9.9 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2023.11.17-py3-none-any.whl.metadata (2.2 kB)
Downloading requests-2.31.0-py3-none-any.whl (62 kB)
   ---------------------------------------- 0.0/62.6 kB ? eta -:--:--
   ------ --------------------------------- 10.2/62.6 kB ? eta -:--:--
   -------------------------- ------------- 41.0/62.6 kB 667.8 kB/s eta 0:00:01
   ---------------------------------------- 62.6/62.6 kB 562.7 kB/s eta 0:00:00
Downloading certifi-2023.11.17-py3-none-any.whl (162 kB)
   ---------------------------------------- 0.0/162.5 kB ? eta -:--:--
   -------------------- ------------------- 81.9/162.5 kB 2.3 MB/s eta 0:00:01
   -----

In [28]:
import os
import re
import boto3
import requests
from dotenv import load_dotenv

load_dotenv()
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
REGION_NAME = os.environ.get('REGION_NAME')

dynamodb = boto3.resource('dynamodb', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name=REGION_NAME)


In [105]:
table = dynamodb.Table('SPOTIFY_ZODIAC')

response = table.scan()
data = response['Items']

while 'LastEvaluatedKey' in response:
    response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
    data.extend(response['Items'])

print(f"there are {len(data)} entries")
print(f"\n{data}")

there are 1 entries

[{'count': Decimal('50'), 'Account_id': 'b571fd6b1cbd0247ac805858bdc52c13bbfc71f50082e0a7f1bd253bdf5a56c8', 'audio_features': [{'track_href': 'https://api.spotify.com/v1/tracks/1PJe0Vt9p0QaV4ZNHUwml0', 'loudness': '-10.769', 'liveness': '0.118', 'tempo': '110.088', 'valence': '0.497', 'instrumentalness': Decimal('0'), 'type': 'audio_features', 'danceability': '0.733', 'uri': 'spotify:track:1PJe0Vt9p0QaV4ZNHUwml0', 'speechiness': '0.0291', 'mode': Decimal('0'), 'duration_ms': Decimal('214922'), 'acousticness': '0.559', 'id': '1PJe0Vt9p0QaV4ZNHUwml0', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1PJe0Vt9p0QaV4ZNHUwml0', 'key': Decimal('0'), 'energy': '0.369', 'time_signature': Decimal('4')}, {'track_href': 'https://api.spotify.com/v1/tracks/45mvVEa99kOMcjHJpCykrw', 'loudness': '-9.505', 'liveness': '0.126', 'tempo': '169.941', 'valence': '0.708', 'instrumentalness': '5.51e-05', 'type': 'audio_features', 'danceability': '0.58', 'uri': 'spotify:track:45mv

In [103]:
# Helper functions:

def get_playlist_uri(playlist_url: str):
    return re.search(r'/([^/]+)\?', playlist_url).group(1)

def get_track_ids_and_store_audio_features(access_token: str, playlist_uri: str):
    playlist_info = {
        'track_ids': [],
        'playlist_uri': playlist_uri,
    }

    tracks_table = dynamodb.Table('SPOTIFY_TRACKS')

    headers = {
        'Authorization': f"Bearer {access_token}"
    }

    print(f"fetching playlist {playlist_uri}")
    raw_json_to_check = {'next': f"https://api.spotify.com/v1/playlists/{playlist_uri}/tracks?market=US&limit=50"}

    num = 0
    while raw_json_to_check['next'] != None:
        print(num)
        response = requests.get(url = raw_json_to_check['next'], headers = headers)

        if response.status_code == 200:
            playlist_raw_json = response.json()

            map_track_ids = []
            for item in playlist_raw_json['items']:
                if item['track'] != None and item['track']['id'] not in playlist_info['track_ids']:
                    track_id = item['track']['id']

                    map_track_ids.append({'id': track_id})
                    playlist_info['track_ids'].append(track_id)
            
            track_ids = []
            index = 0
            while index < len(map_track_ids) - 1:
                db_response = dynamodb.batch_get_item(
                    RequestItems={
                        'SPOTIFY_TRACKS': {
                            'Keys': map_track_ids[index:index + 100]
                        }
                    }
                )
                track_received = db_response['Responses']['SPOTIFY_TRACKS']
                track_ids_received = {}
                for track in track_received:
                    track_ids_received[track['id']] = 1

                # add track if havent been added
                max_value = index + 100
                if max_value > len(map_track_ids):
                    max_value = len(map_track_ids)
                
                for i in range(index, max_value):
                    track_id = map_track_ids[i]['id']

                    if track_id not in track_ids_received:
                        index += 1
                        track_ids.append(track_id)
                
                index += len(track_received) - 1

            if len(track_ids) != 0:
                query = {
                    'ids': ','.join(track_ids)
                }
                response = requests.get(url = 'https://api.spotify.com/v1/audio-features', headers = headers, params = query)

                if response.status_code == 200:
                    raw_json = response.json()
                    
                    i = 0
                    while i < len(raw_json['audio_features']):
                        audio_features = raw_json['audio_features'][i]

                        if audio_features != "None" and audio_features != None:
                            for key, value in audio_features.items():
                                if isinstance(value, float):
                                    raw_json['audio_features'][i][key] = str(value)
                                    
                        else:
                            raw_json['audio_features'].pop(i)
                            i -= 1

                        i += 1
                    
                    for track in raw_json['audio_features']:
                        tracks_table.put_item(Item=track)

                else:
                    print(response.text)

            num += 1
            raw_json_to_check = playlist_raw_json

        else:
            print(response.text)
    
    return playlist_info

In [102]:
access_token = 'BQAJOGZF59zB0b1QJshV3KU1wxJ0YTFp8iVcF28wSdpxJ4UXcihI6_hQHj3z7n6hASkAVQ5MX9bhtWJV_00TmcLl_uWS6xY209xp41Bswf213O95yv_l0j7gIttpC8v5pPy7OpPtSDkYc34SMxWn0gVyjvLsETwjUxxrHmcC6i25WefJsote138nFv-j_K5df8UFVsb6lsoIr-HqzJ4IGw'


tracks_table = dynamodb.Table('SPOTIFY_TRACKS')

headers = {
  'Authorization': f"Bearer {access_token}"
}

response = requests.get(url = 'https://api.spotify.com/v1/me/playlists?limit=50&offset=41', headers = headers)

playlist_ids = []
if response.status_code == 200:
  raw_json = response.json()
  
  for playlist in raw_json['items']:
    playlist_ids.append(playlist['id'])
else:
  print(response.text)

num = 1
for playlist_id in playlist_ids:
  print(f"fetching playlist {num}/{len(playlist_ids)}")
  
  print(len(get_track_ids_and_store_audio_features(access_token, playlist_id)['track_ids']))

  num += 1

fetching playlist 1/50
fetching playlist 2PUQuFW8Ddt3Q8VEXoqFlo
582
fetching playlist 2/50
fetching playlist 3VwDez4nzzmGd7nJcUkplw
24
fetching playlist 3/50
fetching playlist 6KmmrRdzoSi4Kug7GggExO
46
fetching playlist 4/50
fetching playlist 3Cm0dBJQAfuqZjtl9Jclmk
16
fetching playlist 5/50
fetching playlist 4AIO3pMMrBCV4TE1dutWyd
55
fetching playlist 6/50
fetching playlist 4guekvaMtO4VgM5GrVnnjk


ConnectionError: HTTPSConnectionPool(host='api.spotify.com', port=443): Max retries exceeded with url: /v1/playlists/4guekvaMtO4VgM5GrVnnjk/tracks?offset=250&limit=50&market=US (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000017D2A9A61D0>: Failed to establish a new connection: [WinError 10051] A socket operation was attempted to an unreachable network'))