In [18]:
import pandas as pd
import plotly.express as px

In [3]:
temp0 = pd.read_json('../MyData/StreamingHistory0.json')
temp1 = pd.read_json('../MyData/StreamingHistory1.json')
temp2 = pd.read_json('../MyData/StreamingHistory2.json')

# combining all streams
df_streams = temp0.append(temp1).append(temp2)
df_streams.shape

(22693, 4)

In [4]:
# converting the milliseconds values to minutes to improve readability
df_streams['minutes_played'] = df_streams['msPlayed'].divide(60000)

# renaming columns
columns_name = {'endTime':'end_time', 'artistName':'artist_name', 'trackName':'track_name', 
                'msPlayed':'milliseconds_played', 'minutes_played': 'minutes_played'}
df_streams.rename(columns= columns_name, inplace=True)

In [60]:
df_streams.head()

Unnamed: 0,end_time,artist_name,track_name,milliseconds_played,minutes_played,date,time,day,month
0,2020-05-14 21:52:00,OneRepublic,Apologize,188270,3.137833,2020-05-14,21:52:00,Thursday,May
1,2020-05-14 21:52:00,AC/DC,Highway to Hell,1690,0.028167,2020-05-14,21:52:00,Thursday,May
2,2020-05-14 21:52:00,Imagine Dragons,Believer,940,0.015667,2020-05-14,21:52:00,Thursday,May
3,2020-05-14 21:55:00,Y2K,Lalala,160626,2.6771,2020-05-14,21:55:00,Thursday,May
4,2020-05-14 21:56:00,Backstreet Boys,Shape of My Heart,930,0.0155,2020-05-14,21:56:00,Thursday,May


In [58]:
# separating date and time
df_streams['end_time'] = pd.to_datetime(df_streams['end_time'])

df_streams['date'] = [instance.date() for instance in df_streams['end_time']]
df_streams['time'] = [instance.time() for instance in df_streams['end_time']]
df_streams['day'] = pd.DatetimeIndex(df_streams['end_time']).day_name()
df_streams['month'] = pd.DatetimeIndex(df_streams['end_time']).month_name()

In [59]:
df_streams.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22693 entries, 0 to 2692
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   end_time             22693 non-null  datetime64[ns]
 1   artist_name          22693 non-null  object        
 2   track_name           22693 non-null  object        
 3   milliseconds_played  22693 non-null  int64         
 4   minutes_played       22693 non-null  float64       
 5   date                 22693 non-null  object        
 6   time                 22693 non-null  object        
 7   day                  22693 non-null  object        
 8   month                22693 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(6)
memory usage: 1.7+ MB


In [9]:
df_streams.groupby(by='artist_name')['minutes_played'].sum().sort_values(ascending=False)[:10]

artist_name
Arijit Singh     740.679883
Pritam           661.861550
A.R. Rahman      368.314833
Post Malone      306.676417
Prateek Kuhad    303.163333
Ed Sheeran       302.120150
John Williams    291.878367
Darshan Raval    277.400567
Hans Zimmer      274.062167
Eminem           243.227600
Name: minutes_played, dtype: float64

In [10]:
df_streams.groupby(by='date')['minutes_played'].sum().sort_values(ascending=False)[:10]

date
2021-01-11    703.297533
2021-02-10    372.407950
2020-10-31    323.800467
2020-08-29    288.140083
2020-08-24    287.289667
2020-10-30    268.640467
2020-09-20    264.200167
2020-10-09    241.502967
2020-07-17    233.641983
2020-09-13    233.403500
Name: minutes_played, dtype: float64

In [39]:
grp_date = pd.DataFrame(df_streams.groupby(by='date')['minutes_played'].sum()).reset_index()

fig = px.line(grp_date, x='date', y='minutes_played', title='Songs streamed in a year (May-2020 to May 2021)')
fig.show()

Jan 11th, 2021 Spent = 703.29 minutes

In [62]:
grp_month = pd.DataFrame(df_streams.groupby(by='month')['minutes_played'].sum()).reset_index()
grp_month['minutes_played'] = round(grp_month.minutes_played, 2)

# Bar Plot
fig = px.bar(grp_month, x='month', y='minutes_played', text='minutes_played', title='Songs streamed by day')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_xaxes(categoryorder='total descending')
fig.show()

# Pie Chart
#fig = px.pie(grp_month, values='minutes_played', names='month', title='Songs played by Month')
#fig.update_traces(textposition='inside', textinfo='percent+label')
#fig.show()

In [57]:
grp_day = pd.DataFrame(df_streams.groupby(by='day')['minutes_played'].sum()).reset_index()
grp_day['minutes_played'] = round(grp_day.minutes_played, 2)

# Bar Plot
#fig = px.bar(grp_day, x='day', y='minutes_played', text='minutes_played', title='Songs streamed by day')
#fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
#fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
#fig.update_xaxes(categoryorder='total descending')
#fig.show()

# Pie Chart
fig = px.pie(grp_day, values='minutes_played', names='day', title='Songs played by Day')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [12]:
df_streams.groupby(by='track_name')['minutes_played'].sum().sort_values(ascending=False)[:10]

track_name
Modern Loneliness - Ritviz Remix                          125.250317
Emptiness and Aitebar - Live                              121.155250
Julali Gaath Ga                                           110.668733
Nervous                                                   108.964483
VIDA LOCA                                                 107.885300
Do You?                                                   107.411133
Kalank (Title Track)                                      107.183900
HIGHEST IN THE ROOM (feat. ROSALÍA & Lil Baby) - REMIX    107.099100
Ek Tarfa                                                  104.361417
Teri Meri Ladayi                                          103.352350
Name: minutes_played, dtype: float64

In [16]:
min_date = min(df_streams.end_time)
max_date = max(df_streams.end_time)

max_date - min_date

Timedelta('366 days 01:59:00')