In [10]:
import pandas as pd
import json
import numpy as np
import os
from pathlib import Path

### 1. Setting up the dataframe

In [11]:
pd.set_option('display.max_rows', None)

# change direc_name to be the name of the folder that you downloaded from Spotify
# it is usually "MyData"
direc_name = "MyData220527/"
file_list = [direc_name + x for x in os.listdir(direc_name)
             if x.split('.')[0][:-1] == 'StreamingHistory']

dfs = []                                        # an empty list to store the data frames
for file in file_list:
    data = pd.read_json(file)                   # read data frame from json file
    dfs.append(data)                            # append the data frame to the list
df = pd.concat(dfs, ignore_index=True)          # concatenate all the data frames in the list.
df['endTime'] = pd.to_datetime(df['endTime'])
df.head(5)

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2022-04-29 17:15:00,HAIM,Up From A Dream,15410
1,2022-04-29 17:15:00,Clubhouse,NO WAY!,106460
2,2022-04-29 18:13:00,HAIM,Don't Wanna,244360
3,2022-04-29 18:16:00,aespa,YEPPI YEPPI,132610
4,2022-04-29 20:34:00,aespa,YEPPI YEPPI,80949


### 2. Streaming data for the past year

In [12]:
# get the streams with more than 30 seconds played, as those are the ones that are counted by Spotify
valid_streams= df[df['msPlayed'] > 30000]
total = valid_streams['msPlayed'].sum()
print("Hours listened:", total / 1000.0 / 60.0 / 60.0)

Hours listened: 909.8294341666666


In [27]:
# get individual stream counts for each track in the past year (entire dataset)
stream_counts = valid_streams.groupby(['artistName','trackName']).size().reset_index(name='Streams').sort_values(by=['Streams'],ascending=False)
path = str(Path().absolute())
stream_counts.to_csv(path + '/entire_year.csv')
stream_counts.set_index(np.arange(1, len(stream_counts)+1)).head(50)

Unnamed: 0,artistName,trackName,Streams
1,TAEYEON,INVU,462
2,TAEYEON,Can't Control Myself,390
3,Kim Kyung Rok,Sand Castle,259
4,TAEYEON,Toddler,182
5,TAEYEON,Some Nights,178
6,TAEYEON,Siren,161
7,TAEYEON,Timeless,153
8,TAEYEON,Cold As Hell,148
9,TAEYEON,Set Myself On Fire,138
10,HEIZE,HAPPEN,137


### 3. Streaming data for 2022

In [14]:
# get top 10 artists from 2022 and the number of streams
streams_2022 = valid_streams[valid_streams['endTime'] > '2022-01-01']
artist_counts = streams_2022['artistName'].value_counts()
artist_counts.head(10)

TAEYEON          2817
Kenshi Yonezu     389
Glen Check        163
Taylor Swift      144
10cm              134
October           114
FINNEAS           108
mol-74             97
IU                 90
Michael Tai        63
Name: artistName, dtype: int64

In [15]:
# look at streams from 2022 instead of the past year:
streams_2022_counts =streams_2022.groupby(['artistName','trackName']).size().reset_index(name='Streams').sort_values(by=['Streams'],ascending=False)
path = str(Path().absolute())

# output 2022 stream data into 2022.csv
streams_2022_counts.to_csv(path + '/2022.csv') 

In [16]:
# search for a specific song:
song_name = 'INVU'

song_stream_count = streams_2022_counts[streams_2022_counts['trackName'] == song_name]
song_stream_count.head(10)

Unnamed: 0,artistName,trackName,Streams
1339,TAEYEON,INVU,462


In [26]:
# search for a specific artist:
artist_name = 'TAEYEON'

artist_stream_counts = streams_2022_counts[streams_2022_counts['artistName'] == artist_name]
artist_stream_counts.set_index(np.arange(1, len(artist_stream_counts)+1)).head(50)

Unnamed: 0,artistName,trackName,Streams
1,TAEYEON,INVU,462
2,TAEYEON,Can't Control Myself,390
3,TAEYEON,Toddler,182
4,TAEYEON,Some Nights,178
5,TAEYEON,Siren,161
6,TAEYEON,Timeless,153
7,TAEYEON,Cold As Hell,148
8,TAEYEON,Set Myself On Fire,138
9,TAEYEON,Ending Credits,135
10,TAEYEON,Heart,115


### 2022 Spotify Wrapped (so far)

In [24]:
# see your Spotify Wrapped so far
streams_2022_counts.set_index(np.arange(1, len(streams_2022_counts)+1)).head(100)

Unnamed: 0,artistName,trackName,Streams
1,TAEYEON,INVU,462
2,TAEYEON,Can't Control Myself,390
3,TAEYEON,Toddler,182
4,TAEYEON,Some Nights,178
5,TAEYEON,Siren,161
6,TAEYEON,Timeless,153
7,TAEYEON,Cold As Hell,148
8,TAEYEON,Set Myself On Fire,138
9,TAEYEON,Ending Credits,135
10,TAEYEON,Heart,115
