In [186]:
#Load libraries and API authentication
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
from dotenv import load_dotenv
import plotly.graph_objects as go
import numpy as np
from plotly.colors import n_colors
import plotly.express as px
import chart_studio
import chart_studio.plotly as py
import chart_studio.tools as tls

load_dotenv()

cid = os.getenv('cid')
secret = os.getenv('secret')

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


username = os.getenv('username')
api_key = os.getenv('api_key')
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [155]:
#Playlists' urls for my top songs 
my_urls = {}

my_urls["2016"] = "https://open.spotify.com/playlist/37i9dQZF1Cz2XHt3lOfJxi?si=dcbd842aaf4e4f7d"
my_urls["2017"] = "https://open.spotify.com/playlist/37i9dQZF1E9KfeBAuCai3p?si=2de00b969b104899"
my_urls["2018"] = "https://open.spotify.com/playlist/37i9dQZF1Eje9zS72t1M5m?si=461041194faa450a"
my_urls["2019"] = "https://open.spotify.com/playlist/37i9dQZF1Etac3sI88VwlJ?si=faeb4bda8c664996"
my_urls["2020"] = "https://open.spotify.com/playlist/37i9dQZF1EM0XmByuUVL4X?si=cd1c544eec334332"
my_urls["2021"] = "https://open.spotify.com/playlist/3Qlny1jLSBW0uYUBbWH6tV?si=1d2d702e9a624284"
my_urls["2022"] = "https://open.spotify.com/playlist/2xctbBFfjBkSN5BD4NGjTe?si=dc1fa6cd4c364ae0"

#Playlists' urls for global top songs
global_urls = {}

global_urls["2016"] = "https://open.spotify.com/playlist/37i9dQZF1DX8XZ6AUo9R4R?si=3889e1d8593d47e1"
global_urls["2017"] = "https://open.spotify.com/playlist/37i9dQZF1DWTE7dVUebpUW?si=0dea96c43add4260"
global_urls["2018"] = "https://open.spotify.com/playlist/37i9dQZF1DXe2bobNYDtW8?si=33f66f95570241e3"
global_urls["2019"] = "https://open.spotify.com/playlist/37i9dQZF1DWVRSukIED0e9?si=23502d9cbee04513"
global_urls["2020"] = "https://open.spotify.com/playlist/2fmTTbBkXi8pewbUvG3CeZ?si=45804932d9d64d2b"
global_urls["2021"] = "https://open.spotify.com/playlist/5GhQiRkGuqzpWZSE7OU4Se?si=d9326c93d57e4947"
global_urls["2022"] = "https://open.spotify.com/playlist/37i9dQZF1DX18jTM2l2fJY?si=5d7dccdf2e7f482c"

#Creating a list for years
years = []

for year in range(2016, 2022+1):
    years.append(str(year))

In [160]:
#Function to get tracks from plalists 
def get_tracks(years, urls, no_tracks_per_playlist):
    
    all_tracks = []
    for year in years:
        tracks = []
        results = sp.playlist_tracks(urls[year])["items"]
        for i in range(0,no_tracks_per_playlist):  
            track = {}
            result = results[i]["track"]
            track["track"] = result["name"]
            track["artist"] = result["artists"][0]["name"]
            track["track_id"] = result["id"]
            track["artist_id"] = result["artists"][0]["id"]
            track["playlist_year"] = year
            track["rank"] = i
            tracks.append(track)

        all_tracks.extend(tracks)

    tracks_df = pd.DataFrame(all_tracks)

    return tracks_df

In [161]:
#Function to get audio features of tracks using track ids

def get_audiofeatures(tracks_df):

    track_ids = tracks_df["track_id"].values.tolist()

    audio_features = []
    for i in range(0, len(track_ids), 100):
        audio_features += sp.audio_features(track_ids[i:i+100])

    audio_features_df = pd.DataFrame(audio_features)

    full_df = pd.concat([tracks_df, audio_features_df], axis=1)

    return full_df

In [162]:
#Function to transform data for charts
def chart_data(full_df):
    
    data = full_df.pivot(index='playlist_year', columns='rank', values='valence').reset_index(drop=True).to_numpy()
    
    return data

In [180]:
my_data = get_tracks(years, my_urls, 100)
my_data_af = get_audiofeatures(my_data)
my_data_chart = chart_data(my_data_af)

In [183]:
global_data = get_tracks(years, global_urls, 50) #note: using 50 songs as spotify only included 50 songs for year 2022
global_data_af = get_audiofeatures(global_data)
global_data_chart = chart_data(get_audiofeatures(global_data))

In [202]:
#Plot ridgeline chart for my listening history

colors = n_colors('rgb(5, 200, 200)', 'rgb(200, 10, 10)', 7, colortype='rgb')

aiken_fig = go.Figure()
for i, (data_line, color) in enumerate(zip(my_data_chart, colors)):
    aiken_fig.add_trace(go.Violin(x=data_line, line_color=color, name=str(2016+i)))

aiken_fig.update_traces(orientation='h', side='positive', width=3, points=False)
aiken_fig.update_layout(xaxis_showgrid=False, 
                  xaxis_zeroline=False, 
                  autosize=False, 
                  width=1000,  
                  height=800,
                  title='Positiveness of My Spotify Listening History from 2016 to 2022',
                  showlegend=False,
                  xaxis=dict(title='Positiveness'),)

aiken_fig.show()

In [203]:
global_fig = go.Figure()
for i, (data_line, color) in enumerate(zip(global_data_chart, colors)):
    global_fig.add_trace(go.Violin(x=data_line, line_color=color, name=str(2016+i)))

global_fig.update_traces(orientation='h', side='positive', width=3, points=False)
global_fig.update_layout(xaxis_showgrid=False, 
                  xaxis_zeroline=False, 
                  autosize=False, 
                  width=1000,  
                  height=800,
                  title='Positiveness of Global Top Tracks from 2016 to 2022',
                  showlegend=False,
                  xaxis=dict(title='Positiveness'),)

global_fig.show()

In [184]:
#Group by year for my data
my_data_year = my_data_af.groupby("playlist_year", as_index=False).median("valence")
#Add type -> "Me"
my_data_year["type"] = "Me" 

#Group by year for global data
global_data_year = global_data_af.groupby("playlist_year", as_index=False).median("valence")
#Add type -> "Global"
global_data_year ["type"] = "Global" 


# Combine datasets
combined_df= pd.concat([my_data_year,global_data_year], axis=0).reset_index()

In [199]:
# Plotting median of valence over the years 
compare_fig = px.line(combined_df, x='playlist_year', y='valence', color='type', symbol="type")
compare_fig.update_traces(hovertemplate='Median Positiveness: %{y}<br>Year: %{x}')
compare_fig.update_layout(xaxis_title='Year', yaxis_title = 'Positiveness', title='Median Positiveness of My Vs Global Top Tracks from 2016 to 2022')
compare_fig.show()

In [204]:
#Save plots to plotly chart studio

py.plot(aiken_fig, filename = "aiken's spotify positiveness", auto_open=False)
py.plot(global_fig, filename = "global spotify positiveness", auto_open=False)
py.plot(compare_fig, filename = "aiken vs global spotify positiveness", auto_open=False)

'https://plotly.com/~aiken.ong/13/'