# Notes

- usually takes 6-7 minutes to run depending on amount of data that is being fetched
- gets spotify data using spotipy library
- saves data
  - locally
  - in aws
- does not display collage in matplotlib

In [1]:
# !export

#### Module installs

In [2]:
# !pip3 install git+https://github.com/plamere/spotipy.git --upgrade
# !pip3 install image
# !pip3 install Send2Trash

#### Imports

In [3]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

import os
import urllib.request

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image

import requests
from io import BytesIO
import boto3

from datetime import datetime

In [4]:
print('time: '+ str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

time: 2024-06-23 10:28:48


#### Spotipy set up

In [5]:
client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
redirect_URI = os.environ['SPOTIFY_REDIRECT_URI']
username = os.environ['SPOTIFY_USERNAME']

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

scope = 'user-library-read user-read-currently-playing user-read-playback-state user-read-recently-played'
token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_URI)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

In [6]:
# usually takes some time to execute...

url_list = []

C = 25
R = 9
limit = C
amount = C*R*2

for i in range(0, amount, limit):
    saved_tracks = sp.current_user_saved_tracks(limit=limit, offset=i)
    
    for t in saved_tracks['items']:
        track = t['track']
        track_album = track['album']
        url = track_album['images'][0]['url']
        url_dict = {
            'id': track_album['id'],
            'name': track_album['name'],
            'url': url
        }
        url_list.append(url_dict)
        
    print('Progress: {}/{}'.format(i+limit, amount), end='\r')

Progress: 450/450

In [7]:
df = pd.DataFrame(url_list)
df.drop_duplicates(keep='first', inplace=True) # drop duplicate albums
df.reset_index(inplace=True)
df.drop('index', axis=1, inplace=True)
df.head()

Unnamed: 0,id,name,url
0,6kZ42qRrzov54LcAk4onW9,Red (Taylor's Version),https://i.scdn.co/image/ab67616d0000b273318443...
1,3RE8NUULcBzFvVtCmlI4lb,Gettin' Ready (Expanded Edition),https://i.scdn.co/image/ab67616d0000b2739f0636...
2,7aJuG4TFXa2hmE4z1yxc3n,HIT ME HARD AND SOFT,https://i.scdn.co/image/ab67616d0000b27371d62e...
3,4UlGauD7ROb3YbVOFMgW5u,ANTI (Deluxe),https://i.scdn.co/image/ab67616d0000b27333c6b9...
4,6WW8YiLb4jM9GiSbPUPBfu,Anyone But You (Original Motion Picture Soundt...,https://i.scdn.co/image/ab67616d0000b273247f10...


In [8]:
df.shape

(411, 3)

In [9]:
aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
bucket_name = os.environ['AWS_BUCKET_NAME']
img_folder_name = os.environ['AWS_BUCKET_FOLDER_NAME']

def create_client():
    return boto3.client(
        's3',
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key
    )
    
def create_session():
    return boto3.Session(
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key
    )
    
def create_client_from_session():
    boto_session = create_session()
    return boto_session.client('s3')
    
def create_resource_from_session():
    boto_session = create_session()
    return boto_session.resource('s3')
    
def create_bucket(bucket_name):
    s3 = create_resource_from_session()
    return s3.Bucket(bucket_name)

In [10]:
# create AWS S3 folder

response = create_client().put_object(
    Bucket=bucket_name,
    Key=img_folder_name
)
print(response)

bucket = create_bucket(bucket_name)

{'ResponseMetadata': {'RequestId': 'AYA8MAMRG0JPE180', 'HostId': 'Zrzq7l3LSY6r/cfcufSMMCAwNZJaOcQ5roy8IAJSGGF0/vyku5rz7L+0grZwkAYjD96EV+S1eUiyIWnssgrnvA==', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'Zrzq7l3LSY6r/cfcufSMMCAwNZJaOcQ5roy8IAJSGGF0/vyku5rz7L+0grZwkAYjD96EV+S1eUiyIWnssgrnvA==', 'x-amz-request-id': 'AYA8MAMRG0JPE180', 'date': 'Sun, 23 Jun 2024 15:28:57 GMT', 'x-amz-server-side-encryption': 'AES256', 'etag': '"d41d8cd98f00b204e9800998ecf8427e"', 'server': 'AmazonS3', 'content-length': '0'}, 'RetryAttempts': 1}, 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"', 'ServerSideEncryption': 'AES256'}


In [11]:
print('time: '+ str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

time: 2024-06-23 10:28:56


In [12]:
# usually takes some time to execute...

# create files for images

cutoff = C * R
temp_image_name = 'img.jpeg'

for index, row in df.iterrows():
    url = row['url']
    name = str(index+1)
    urllib.request.urlretrieve(url, temp_image_name)
    data = open(temp_image_name, 'rb')
    
    s3 = create_client_from_session()
    s3.put_object(Body = data, Bucket=bucket_name, Key=img_folder_name + name + '.jpeg')

    print('{}/{} files added.'.format(name, cutoff), end='\r')
    
    res = (index + 1) % cutoff
    if res == 0:
        break;
        
if os.path.exists(temp_image_name):
    os.remove(temp_image_name)

225/225 files added.

In [13]:
def collage(array, ncols=20):
    nindex, height, width, intensity = array.shape
    nrows = nindex//ncols
    assert nindex == nrows*ncols
    result = (
        array
        .reshape(nrows, ncols, height, width, intensity)
        .swapaxes(1, 2)
        .reshape(height*nrows, width*ncols, intensity)
    )
    return result
    
def make_array(bucket, bucket_name):
    l = []
    
    for count, bucket_object in enumerate(bucket.objects.all()):
        if count==0:
            continue
        else:
            print('first=', bucket_object) # displays lexicographically
            key = bucket_object.key
            
            location = create_client().get_bucket_location(Bucket=bucket_name)['LocationConstraint']

            url = 'https://s3-%s.amazonaws.com/%s/%s' % (location, bucket_name, key)
            if '.jpeg' in url:
                response = requests.get(url)
                print(response.status_code)
                if response.status_code == 200:
                    t = BytesIO(response.content)
                    im = Image.open(t)
                    imc = im.convert('RGB')
                    if imc.size == (640, 640):
                        l.append(url)

    l = l[:200]
    
    np_array_list = []
    for url in l:
        print('second=', url) # displays lexicographically
        item = np.asarray(Image.open(BytesIO(requests.get(url).content)).convert('RGB'))
        np_array_list.append(item)
    return np.array(np_array_list)

In [14]:
print('time: '+ str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

time: 2024-06-23 10:32:28


In [15]:
# usually takes some time to execute...

arr = make_array(bucket, bucket_name)
print('shape', arr.shape)

first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/1.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/10.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/100.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/101.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/102.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/103.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/104.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/105.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/106.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotify-album-collage', key='collage-images/107.jpeg')
200
first= s3.ObjectSummary(bucket_name='spotif

In [16]:
print('time: '+ str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

time: 2024-06-23 10:35:30


In [17]:
result = collage(arr)

In [18]:
collage_dir = './collages/'
if not os.path.exists(collage_dir):
    os.makedirs(collage_dir)

current_time = str(int(datetime.now().strftime('%Y%m%d%H%M%S')))

file_name = current_time + '_collage-aws-no-plt.jpeg'

file_path = collage_dir + file_name

print(file_path)

./collages/20240623103530_collage-aws-no-plt.jpeg


In [23]:
im = Image.fromarray(result)
im.save(file_path)

In [19]:
print('time: '+ str(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

time: 2024-06-23 10:35:30


In [22]:
# delete objects from AWS S3 bucket

bucket.objects.filter(Prefix=img_folder_name).delete()

[{'ResponseMetadata': {'RequestId': '1PZA8ZW9BV3XSMKM',
   'HostId': 'akKo0t7yYXqK9BNCfWY8ge/J18YOIMko/MHqLmnFspRjH3MGQLKbMTKqXhix6XYpk9wADubFp0kLJOln/l2l5pd/47xlBMywTerBQ24i+8E=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'akKo0t7yYXqK9BNCfWY8ge/J18YOIMko/MHqLmnFspRjH3MGQLKbMTKqXhix6XYpk9wADubFp0kLJOln/l2l5pd/47xlBMywTerBQ24i+8E=',
    'x-amz-request-id': '1PZA8ZW9BV3XSMKM',
    'date': 'Sun, 23 Jun 2024 15:35:31 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'collage-images/192.jpeg'},
   {'Key': 'collage-images/224.jpeg'},
   {'Key': 'collage-images/115.jpeg'},
   {'Key': 'collage-images/67.jpeg'},
   {'Key': 'collage-images/174.jpeg'},
   {'Key': 'collage-images/82.jpeg'},
   {'Key': 'collage-images/173.jpeg'},
   {'Key': 'collage-images/'},
   {'Key': 'collage-images/116.jpeg'},
   {'Key': 'collage-images/111.jpeg'},
   {'Key': 'col