# My weekly listenings in Spotify
n most listened artists of each week are shown

## 0. Settings before run

In [None]:
# specify path to the folder called 'MyData'
# it's allowed to store more than one folder 'MyData' here
# you can also rename folder(s) 'MyData' to anything you want
file_path = '/Users/xxx/folder_name' 

# specify how many top artists per week should be shown
n = 3

## 1. Imports

In [2]:
import plotly.express as px
import plotly.graph_objects as go
from itertools import cycle
import pandas as pd
import numpy as np
import glob
import os
from datetime import timedelta

## 2. Data reading and transformation

In [3]:
# reading multiple files
json_pattern = os.path.join(file_path,'*/StreamingHistory*.json')
file_list = glob.glob(json_pattern)

dfs = [] 
for file in file_list:
    data = pd.read_json(file, lines=False) 
    dfs.append(data)
data = pd.concat(dfs, ignore_index=True) 

data.drop_duplicates(inplace=True)

In [4]:
# preparing columns
data["endTime"] = pd.to_datetime(data["endTime"])
data["year"] = data["endTime"].dt.year.astype("int")
data["artist"] = data["artistName"]
data["month"] = data["endTime"].dt.month.astype("int")
data["week"] = data['endTime'] - pd.to_timedelta(data['endTime'].dt.dayofweek, unit='d')+timedelta(days=6)
data['period'] = data['week'].astype(str).str[:10]

# removing tracks played shorter than 0,5 min (30000 ms)
data = data[data["msPlayed"] >= 30000] 

# for each artist, grouping by period and number of listenings
# and selecting top n artists for each period 
data = data.groupby(['artist', 'period']).size().reset_index(name='count')
data = data.sort_values('count', ascending=False)
data = data.groupby('period').head(n)

### Adding missing periods to original data 
to create breaks in line plot when missing data (instead of interpolating)

In [5]:
# creating a dataframe with all periods between first and last record
first_period = data.sort_values('period', ascending=True).head(1)['period'].to_string()[-10:]
last_period = data.sort_values('period', ascending=True).tail(1)['period'].to_string()[-10:]
all_periods = pd.date_range(start=first_period, end=last_period, freq='W')
all_periods = pd.DataFrame({'key': np.ones(all_periods.size, dtype=int),
                    'period': all_periods})

# creating a dataframe with all artists
all_artists = pd.DataFrame({'artist': data['artist'].drop_duplicates()})
all_artists = pd.DataFrame({'key': np.ones(all_artists.size, dtype=int), 
                           'artist': data['artist'].drop_duplicates()})
all_artists=all_artists.reset_index(drop=True)

# creating a dataframe "full" containing all artists in all periods
artists_full = []
for j in range(all_artists['key'].size):
    for i in range(all_periods['key'].size):
        artists_full.append(all_artists['artist'][j])
        
periods_full = []
for j in range(all_artists['key'].size):
    for i in range(all_periods['key'].size):
        periods_full.append(all_periods['period'][i])

full = pd.DataFrame({'artist':artists_full,
                    'period':periods_full,
                    'count':None})
full['period'] = full['period'].astype('str')

# merging "full" with original data
# to get number of listenings per artist for each period
full = full.merge(data, on=['artist','period'], how='left')

full = full.sort_values('period', ascending=True)
full['count_y'] = full['count_y'].astype('float')
full = full.where(pd.notnull(full), None)

## 3. Visualisation

In [None]:
palette = cycle(px.colors.qualitative.Bold)

fig = go.Figure(layout_title_text="Top "+ str(n) +" artists each week")

for i in range(all_artists['key'].size):
    artist = all_artists['artist'][i]
    plot_data1 = full[full["artist"]==artist]
    fig.add_trace(go.Scatter(
        x=plot_data1['period'],
        y=plot_data1['count_y'],
        name = artist,
        line_shape='spline',
        mode='lines+markers',
        text = plot_data1['artist'],
        connectgaps=False,
        showlegend=False,
        marker_color=next(palette),
        hovertemplate=
        "<b>%{text}</b><br>" +
        "# of listenings: %{y}" +
        "<extra></extra>",
    ))
    
fig.update_traces(marker=dict(size=7,
                              line=dict(width=1,
                                        color='DarkSlateGrey')))
fig.update_layout(
    hoverlabel=dict(font_size=16),
    xaxis=dict(showgrid=False),
    yaxis=dict(gridcolor='white'))

fig.show()