# Spotify Playlist Personalisation

----

### 1. Set Environmental variables

Use auth_example.env as a sample. Provide all 3 secrets set in your spotify developer platform

* SPOTIFY_CLIENT_ID=
* SPOTIFY_CLIENT_SECRET=
* SPOTIFY_REDIRECT_URI=


In [None]:
from dotenv import load_dotenv

# Load .env file
load_dotenv()


### 2. Generate Token
Provide call back link to generate token for use

In [None]:
import tekore as tk
conf = tk.config_from_environment()
token = tk.prompt_for_user_token(*conf)


### 3. Retrieving Playlist & Track Data

Reference util.py for additional functionality for data pull.

Utilising tekore package to interact with Spotify API.


In [None]:
import util
import asyncio
import pandas as pd

async_spotify = tk.Spotify(token = token, asynchronous=True)
uri = 'spotify:user:gingerale3'
userID = 'gingerale3'

In [None]:
import importlib
importlib.reload(util)

In [None]:
a = await fetch_playlists(async_spotify, userID)

In [None]:
playlist_ids = []
for i, playlist in enumerate(a):
    # if playlist.owner.uri != uri:
    #     continue
    playlist_ids.append((playlist.id, playlist.name))

In [None]:
async def get_playlist_tracks_ids(spotify, playlistID: str, playlistName: str):
    playlistTracks = []
    inital_response = await spotify.playlist_items(playlistID, limit = 1, offset = 0)
    total = inital_response.total 
    playlistTracks.extend({"playlistID": playlistID, "name": playlistName, "track": item} for item in inital_response.items)
    limit = 50
    numOfCalls = (total - 1)//limit + 1

    tasks = [spotify.playlist_items(playlistID, limit = limit, offset = 1 + limit * i) for i in range(numOfCalls)]
    responses = await asyncio.gather(*tasks)
    for response in responses:
        playlistTracks.extend({"playlistID": playlistID, "name": playlistName, "track": item} for item in response.items)

    return playlistTracks


In [None]:
playlistTracks = []
for i in range(len(playlist_ids)//3 + 1):
    tasks = [get_playlist_tracks_ids(async_spotify, id[0], id[1]) for id in playlist_ids[i*3:(i+1)*3]]
    responses = await asyncio.gather(*tasks)
    for response in responses:
        playlistTracks.extend(response)

In [None]:
len(playlistTracks)

In [None]:
# Track id and corresponding playlist
trackId_playlist = []

for tracks in playlistTracks:
    if tracks['track'].track.id is None:
        continue
    trackId_playlist.append({'playlistID': tracks['playlistID'], 'playlistName': tracks['name'], 'id': tracks['track'].track.id})

In [None]:
len(set([(tracks['playlistID'], tracks['track'].track.id)  for tracks in playlistTracks]))

In [None]:
# Get all features for tracks
track_features = await get_tracks_audio_features(async_spotify, tracks = [track['id'] for track in trackId_playlist])
track_features = [track.__dict__ for track in track_features if track is not None]
track_features_df = pd.DataFrame(track_features).drop_duplicates()
df = pd.DataFrame(trackId_playlist).merge(track_features_df, on = 'id', how = 'left')


In [None]:
# Add description playlist descriptions
len(playlist_ids)

In [None]:
# Create consolidate dataframe

In [None]:
features = [
    'acousticness', 
    'danceability',
    'energy',
    'instrumentalness',
    'key',
    'liveness',
    'loudness',
    'mode',
    'speechiness',
    'tempo',
    'time_signature',
    'valence'
]

df_clean = df.dropna(subset=features)

# ### What if i scaled the features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df_clean[features] = scaler.fit_transform(df_clean[features])

### Modelling

In [None]:
df_clean

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Extract the features from the dataframe
data_subset = df_clean[features].values

# Perform t-SNE dimensionality reduction
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(data_subset)

# Add the t-SNE results back into the dataframe
df_clean['tsne-2d-one'] = tsne_results[:,0]
df_clean['tsne-2d-two'] = tsne_results[:,1]

# Create a plot
plt.figure(figsize=(16,10))

# Scatter plot for each playlist
for playlist, group in df_clean.groupby('playlistName'):
    plt.scatter(group['tsne-2d-one'], group['tsne-2d-two'], alpha=0.8, label=playlist)

plt.legend()
plt.title('t-SNE plot colored by playlistName')
plt.xlabel('tsne-2d-one')
plt.ylabel('tsne-2d-two')


In [None]:
playlist_ids

In [None]:
plt.figure(figsize=(16,10))

# Scatter plot for each playlist
for playlist, group in df_clean.groupby('playlistName'):
    if playlist == "Indie Favs": #jazspazz + gingerale3 jason's chauffeur service
        plt.scatter(group['tsne-2d-one'], group['tsne-2d-two'], alpha=0.8, label=playlist, color='blue', s=100)  # Change color and size
    else:
        plt.scatter(group['tsne-2d-one'], group['tsne-2d-two'], alpha=0.2, label=playlist)

plt.legend()
plt.title('t-SNE plot colored by playlistName')
plt.xlabel('tsne-2d-one')
plt.ylabel('tsne-2d-two')

plt.show()
