In [65]:
pip install dash

Collecting dash
  Downloading dash-2.0.0-py3-none-any.whl (7.3 MB)
[K     |████████████████████████████████| 7.3 MB 1.5 MB/s eta 0:00:01
[?25hCollecting flask-compress
  Downloading Flask_Compress-1.10.1-py3-none-any.whl (7.9 kB)
Collecting dash-core-components==2.0.0
  Downloading dash_core_components-2.0.0.tar.gz (3.4 kB)
Collecting dash-table==5.0.0
  Downloading dash_table-5.0.0.tar.gz (3.4 kB)
Collecting dash-html-components==2.0.0
  Downloading dash_html_components-2.0.0.tar.gz (3.8 kB)
Collecting brotli
  Downloading Brotli-1.0.9-cp38-cp38-macosx_10_9_x86_64.whl (421 kB)
[K     |████████████████████████████████| 421 kB 3.9 MB/s eta 0:00:01
Building wheels for collected packages: dash-core-components, dash-table, dash-html-components
  Building wheel for dash-core-components (setup.py) ... [?25ldone
[?25h  Created wheel for dash-core-components: filename=dash_core_components-2.0.0-py3-none-any.whl size=3804 sha256=82df18d62f289e819a7f265c4fac5eafa347bba40205ed9b7c0b0e8024d5e

## PERSONAL SPOTIFY DATA

In [67]:
import pandas as pd
import json
import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time 
from functools import reduce
from datetime import timedelta
import plotly.express as px
from wordcloud import WordCloud 
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [23]:
def load_myspotify_data():
    data = pd.read_json('data/StreamingHistory.json')
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data['endtime'] = pd.to_datetime(data['endtime'])
    data['duration'] = pd.to_timedelta(data['msplayed'], 'ms')
    data['minplayed'] = data['msplayed'] / 1000 / 60
    data['starttime'] = data['endtime'] - data['duration']
    data['year'] = data['starttime'].dt.year
    data['month'] = data['starttime'].dt.month
    data['week'] = data['starttime'].dt.isocalendar().week
    data['dayofweek'] = data['starttime'].dt.dayofweek
    data['day'] = data['starttime'].dt.day
    data['hour'] = data['starttime'].dt.hour
    data['minute'] = data['starttime'].dt.minute
    data['quarter'] = data['starttime'].dt.quarter
    return data

In [3]:
def extract_myartist(df):
    my_artist = list(set(df.artistname))
    my_artist.remove("")
    return pd.DataFrame(my_artist)

In [4]:
def extract_mytrack(df):
    my_track = list(set(df.trackname))
    my_track.remove("")
    return pd.DataFrame(my_track)

In [21]:
def get_quarter(df,quarter):
    if quarter == 'Q1':
        return df[df.quarter == 1] 
    elif quarter == 'Q2':
        return df[df.quarter == 2] 
    elif quarter == 'Q3':
        return df[df.quarter == 3]    
    elif quarter == 'Q4':
        return df[df.quarter == 4]
    else :
        return df     

In [57]:
def get_total_duration_by_(df,column):
    new_df = df.groupby(column).sum()['minplayed'].reset_index().sort_values('minplayed', ascending=False).rename(columns={'minplayed': 'amount'})
    return new_df

In [58]:
def get_moy_duration_by_(df,column):
    new_df = df.groupby(column).mean()['minplayed'].reset_index().sort_values('minplayed', ascending=False).rename(columns={'minplayed': 'amount'})
    return new_df

In [63]:
def get_top10_(df,column):
    new_df = df.groupby(column).sum()['minplayed'].reset_index().sort_values('minplayed', ascending=False).rename(columns={'minplayed': 'amount'}).head(10)

## SPOTIFY API

In [5]:
client_id = 'ef0d92753d4e44658a3a28ce21de6845'
client_secret = '828972dcf8b84b9f8fe76e27cd570c6d'

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [6]:
def get_features(artist,track):  
    
    columns = ["artistname","followers","genres","popularity","artistType",
               "trackname","danceability","energy","loudness","speechiness","instrumentalness",
              "liveness","valence","tempo","duration_ms"]
    
    df = pd.DataFrame([[artist,"","","","",track,"","","","","","","","",""]],columns = columns )
    
    results = sp.search(q='artist:' + artist, type='artist')
    items = results['artists']['items']
    
    if len(items) > 0:
        #get artist features
        features = items[0]
        df.followers[0] = features['followers']['total']
        df.genres[0] = features['genres']
        df.popularity[0] = features['popularity']
        df.artistType[0] = features['type'] 
    
    results = sp.search(q='artist:'+artist+' track:'+track,type='track')
    items = results['tracks']['items']   
    
    if len(items) > 0:
        features = sp.audio_features(items[0]['id'])[0]
        if(features is not None):
            df.danceability[0] = features['danceability']
            df.energy[0] = features['energy']
            df.loudness[0] = features['loudness']
            df.speechiness[0] = features['speechiness']
            df.instrumentalness[0] = features['instrumentalness']
            df.liveness[0] = features['liveness']
            df.valence[0] = features['valence']
            df.tempo[0] = features['tempo']
            df.duration_ms[0] = features['duration_ms']                

    return df

In [7]:
def create_features_df(df): 
    data = df.drop_duplicates(subset=['artistname', 'trackname'], keep=False)
    inter_df = data.apply(lambda row : get_features(row.artistname, row.trackname),axis=1)
    return reduce(lambda df1,df2 : df1.append(df2), inter_df)

In [8]:
def get_list_genres(df) :
    liste = df.genres.apply(pd.Series).reset_index().melt(id_vars='index').dropna()[['index', 'value']]
    return liste

# ANALYSIS

## Personal data

In [29]:
my_data = load_myspotify_data()
my_data

Unnamed: 0,endtime,artistname,trackname,msplayed,duration,minplayed,starttime,year,month,week,dayofweek,day,hour,minute,quarter
0,2020-12-04 18:21:00,GIMS,YOLO,62314,0 days 00:01:02.314000,1.038567,2020-12-04 18:19:57.686,2020,12,49,4,4,18,19,4
1,2020-12-05 00:01:00,Ya Levis,Lokesha,175885,0 days 00:02:55.885000,2.931417,2020-12-04 23:58:04.115,2020,12,49,4,4,23,58,4
2,2020-12-05 15:53:00,Tayc,African Sugar (avec Tiwa Savage),2304,0 days 00:00:02.304000,0.038400,2020-12-05 15:52:57.696,2020,12,49,5,5,15,52,4
3,2020-12-05 15:53:00,Axel Tony,Miel,2538,0 days 00:00:02.538000,0.042300,2020-12-05 15:52:57.462,2020,12,49,5,5,15,52,4
4,2020-12-06 13:22:00,Burna Boy,African Giant,6997,0 days 00:00:06.997000,0.116617,2020-12-06 13:21:53.003,2020,12,49,6,6,13,21,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6802,2021-12-05 17:25:00,Ninho,No Life,4522,0 days 00:00:04.522000,0.075367,2021-12-05 17:24:55.478,2021,12,48,6,5,17,24,4
6803,2021-12-05 17:25:00,Ninho,No Life,108566,0 days 00:01:48.566000,1.809433,2021-12-05 17:23:11.434,2021,12,48,6,5,17,23,4
6804,2021-12-05 17:28:00,Ninho,RER D,223101,0 days 00:03:43.101000,3.718350,2021-12-05 17:24:16.899,2021,12,48,6,5,17,24,4
6805,2021-12-05 17:29:00,Ninho,YSL,15874,0 days 00:00:15.874000,0.264567,2021-12-05 17:28:44.126,2021,12,48,6,5,17,28,4


In [35]:
my_data.describe()

Unnamed: 0,msplayed,duration,minplayed,year,month,week,dayofweek,day,hour,minute,quarter
count,6807.0,6807,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0
mean,152962.5,0 days 00:02:32.962542089,2.549376,2020.950198,6.59527,26.793595,3.245923,15.306596,13.876891,29.058763,2.518143
std,170078.7,0 days 00:02:50.078699632,2.834645,0.217551,3.708326,16.191828,1.865848,9.504157,5.132148,17.213652,1.217556
min,0.0,0 days 00:00:00,0.0,2020.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
25%,47537.0,0 days 00:00:47.537000,0.792283,2021.0,3.0,10.0,2.0,5.0,10.0,14.0,1.0
50%,173333.0,0 days 00:02:53.333000,2.888883,2021.0,7.0,28.0,3.0,16.0,15.0,29.0,3.0
75%,202706.0,0 days 00:03:22.706000,3.378433,2021.0,10.0,42.0,5.0,24.0,18.0,43.0,4.0
max,5290736.0,0 days 01:28:10.736000,88.178933,2021.0,12.0,53.0,6.0,31.0,23.0,59.0,4.0


In [30]:
#Total number of songs

len(my_data)

6807

In [31]:
# Number of gathered songs

my_data['trackname'].nunique()

2022

In [32]:
# Number of gathered artist
my_data['artistname'].nunique()

856

In [33]:
get_quarter(my_data,'Q1')

Unnamed: 0,endtime,artistname,trackname,msplayed,duration,minplayed,starttime,year,month,week,dayofweek,day,hour,minute,quarter
339,2021-01-02 18:28:00,Rudeboy,Reason With Me,30079,0 days 00:00:30.079000,0.501317,2021-01-02 18:27:29.921,2021,1,53,5,2,18,27,1
340,2021-01-02 18:28:00,Ninho,Lettre à une femme,576,0 days 00:00:00.576000,0.009600,2021-01-02 18:27:59.424,2021,1,53,5,2,18,27,1
341,2021-01-02 18:29:00,Rudeboy,Reason With Me,6575,0 days 00:00:06.575000,0.109583,2021-01-02 18:28:53.425,2021,1,53,5,2,18,28,1
342,2021-01-02 18:29:00,Ninho,Lettre à une femme,1489,0 days 00:00:01.489000,0.024817,2021-01-02 18:28:58.511,2021,1,53,5,2,18,28,1
343,2021-01-02 18:29:00,Ninho,Lettre à une femme,190,0 days 00:00:00.190000,0.003167,2021-01-02 18:28:59.810,2021,1,53,5,2,18,28,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2443,2021-03-31 14:34:00,WizKid,Ginger (feat. Burna Boy),5503,0 days 00:00:05.503000,0.091717,2021-03-31 14:33:54.497,2021,3,13,2,31,14,33,1
2444,2021-03-31 14:34:00,Rema,Ginger Me,18730,0 days 00:00:18.730000,0.312167,2021-03-31 14:33:41.270,2021,3,13,2,31,14,33,1
2445,2021-03-31 14:35:00,Omah Lay,Lo Lo,18325,0 days 00:00:18.325000,0.305417,2021-03-31 14:34:41.675,2021,3,13,2,31,14,34,1
2446,2021-03-31 14:37:00,Omah Lay,Godly,175215,0 days 00:02:55.215000,2.920250,2021-03-31 14:34:04.785,2021,3,13,2,31,14,34,1


In [59]:
get_total_duration_by_(my_data,'month')

Unnamed: 0,month,amount
2,3,1929.805183
0,1,1804.386533
8,9,1706.807517
10,11,1646.570067
4,5,1588.499917
9,10,1537.396183
1,2,1536.882467
11,12,1419.9909
7,8,1217.961767
6,7,1079.87565


In [60]:
get_moy_duration_by_(my_data,'dayofweek')

Unnamed: 0,dayofweek,amount
2,2,2.920696
5,5,2.74976
3,3,2.693855
6,6,2.49769
1,1,2.412191
4,4,2.302944
0,0,2.24949


In [61]:
get_total_duration_by_(my_data,'hour')

Unnamed: 0,hour,amount
17,17,1453.888567
16,16,1447.05235
15,15,1267.780383
19,19,1211.263117
11,11,1187.580333
18,18,1171.259017
10,10,1159.919333
14,14,1055.738817
13,13,869.94865
12,12,856.952633


In [34]:
get_top10_artist(my_data)

Unnamed: 0,artistname,amount
690,Si Maman M'avait Dit,678.358
386,Kanye West,462.018583
627,Rema,440.994367
98,Booba,400.493167
382,Kalash,389.77325
301,Hillsong Worship,385.027467
814,William McDowell,372.581233
818,WizKid,370.765333
740,Tayc,350.24555
240,Fally Ipupa,312.915317


In [None]:
#the most appearing artists
my_data \
    .groupby('artistname') \
    .sum()['msplayed'] \
    .reset_index() \
    .sort_values('msplayed', ascending=False) \
    .rename(columns={'msplayed': 'amount'}) \
    .head(10)

In [None]:
#the most appearing artists
my_data \
    .groupby('trackname') \
    .sum()['msplayed'] \
    .reset_index() \
    .sort_values('msplayed', ascending=False) \
    .rename(columns={'msplayed': 'amount'}) \
    .head(10)

In [None]:
#Top five artists from each year

counted_quarter_df = my_data \
    .assign(quarter_added=my_data.quarter) \
    .groupby(['artistname', 'quarter']) \
    .sum()['msplayed'] \
    .reset_index() \
    .sort_values('msplayed', ascending=False)

in_top_5_quarter_artist = counted_quarter_df \
    .groupby('quarter') \
    .head(5) \
    .artistname \
    .unique()


counted_quarter_df \
    [counted_quarter_df.artistname.isin(in_top_5_quarter_artist)] \
    .pivot('artistname', 'quarter', 'msplayed') \
    .fillna(0) \
    .style.background_gradient()

## Features added

In [38]:
features = pd.read_csv("data/features.csv")

In [39]:
features

Unnamed: 0.1,Unnamed: 0,artistname,followers,genres,popularity,artistType,trackname,danceability,energy,loudness,speechiness,instrumentalness,liveness,valence,tempo,duration_ms
0,0,Axel Tony,51388.0,"['francoton', 'zouk riddim']",40.0,artist,Miel,0.596,0.662,-9.055,0.0451,0.000014,0.1320,0.431,153.908,213368.0
1,1,Burna Boy,1605857.0,"['afro dancehall', 'dancehall', 'nigerian hip ...",78.0,artist,Show & Tell (feat. Future),0.623,0.724,-5.238,0.2440,0.000003,0.0900,0.615,127.346,162951.0
2,2,Burna Boy,1605857.0,"['afro dancehall', 'dancehall', 'nigerian hip ...",78.0,artist,Wetin Man Go Do,0.857,0.787,-6.122,0.0735,0.000003,0.3890,0.870,98.222,188181.0
3,3,Burna Boy,1605857.0,"['afro dancehall', 'dancehall', 'nigerian hip ...",78.0,artist,Dangote,0.539,0.651,-5.017,0.4030,0.000002,0.0802,0.800,67.169,225405.0
4,4,13 Organisé,151324.0,"['francoton', 'french hip hop', 'pop urbaine']",61.0,artist,Bande organisée,0.901,0.939,-2.762,0.2740,0.000000,0.0643,0.805,142.948,356347.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043,1043,Ninho,5261227.0,"['french hip hop', 'pop urbaine']",86.0,artist,Outro,0.452,0.454,-11.197,0.0838,0.000000,0.1560,0.402,96.186,215756.0
1044,1044,Ninho,5261227.0,"['french hip hop', 'pop urbaine']",86.0,artist,La maison que je voulais,0.739,0.590,-7.528,0.4130,0.000000,0.1410,0.393,124.098,221782.0
1045,1045,Ninho,5261227.0,"['french hip hop', 'pop urbaine']",86.0,artist,RER D,0.632,0.720,-7.163,0.3450,0.000050,0.3030,0.165,183.003,223101.0
1046,1046,Ninho,5261227.0,"['french hip hop', 'pop urbaine']",86.0,artist,YSL,0.712,0.801,-6.384,0.3310,0.000000,0.2270,0.281,121.024,218596.0


In [None]:
tracks_with_features_df = my_data.merge(features, on=['artistname','trackname'], how='left')

In [None]:
tracks_with_features_df 

In [None]:
get_list_genres(artists).value

In [None]:
text1 = ' '.join(get_list_genres(artists).value)
wordcloud2 = WordCloud().generate(text1)
# Generate plot
plt.imshow(wordcloud2)
plt.axis("off")
plt.show()

In [68]:
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='graph-with-slider'),
    dcc.Slider(
        id='quarter-slider',
        min=my_data['quarter'].min(),
        max=my_data['quarter'].max(),
        value=my_data['quarter'].min(),
        marks={str(quarter): str(quarter) for quarter in my_data['quarter'].unique()},
        step=None
    )
])


@app.callback(
    Output('graph-with-slider', 'figure'),
    Input('quarter-slider', 'value'))
def update_figure(selected_month):
    filtered_df = my_data[my_data.month == selected_month]

    fig = px.scatter(filtered_df, x="month", y="minplayed",
                     log_x=True, size_max=55)

    fig.update_layout(transition_duration=500)

    return fig


if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
