In [None]:
pip install dash

## PERSONAL SPOTIFY DATA

In [1]:
import pandas as pd
import json
import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time 
from functools import reduce
from datetime import timedelta
import plotly.express as px
from wordcloud import WordCloud 
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [2]:
def load_myspotify_data():
    data = pd.read_json('data/StreamingHistory.json')
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data['endtime'] = pd.to_datetime(data['endtime'])
    data['duration'] = pd.to_timedelta(data['msplayed'], 'ms')
    data['minplayed'] = data['msplayed'] / 1000 / 60
    data['starttime'] = data['endtime'] - data['duration']
    data['date'] = data['starttime'].dt.date
    data['year'] = data['starttime'].dt.year
    data['month'] = data['starttime'].dt.month
    data['week'] = data['starttime'].dt.isocalendar().week
    data['dayofweek'] = data['starttime'].dt.dayofweek
    data['day'] = data['starttime'].dt.day
    data['hour'] = data['starttime'].dt.hour
    data['minute'] = data['starttime'].dt.minute
    data['quarter'] = data['starttime'].dt.quarter
    return data

In [3]:
def extract_myartist(df):
    my_artist = list(set(df.artistname))
    my_artist.remove("")
    return pd.DataFrame(my_artist)

In [4]:
def extract_mytrack(df):
    my_track = list(set(df.trackname))
    my_track.remove("")
    return pd.DataFrame(my_track)

In [5]:
def get_quarter(df,quarter):
    if quarter == 'Q1':
        return df[df.quarter == 1] 
    elif quarter == 'Q2':
        return df[df.quarter == 2] 
    elif quarter == 'Q3':
        return df[df.quarter == 3]    
    elif quarter == 'Q4':
        return df[df.quarter == 4]
    else :
        return df     

In [6]:
def get_total_duration_by_(df,column):
    new_df = df.groupby(column).sum()['minplayed'].reset_index()
    return new_df

In [7]:
def get_moy_duration_by_(df,column):
    new_df = df.groupby(column).mean()['minplayed'].reset_index().sort_values('minplayed', ascending=False).rename(columns={'minplayed': 'amount'})
    return new_df

In [8]:
def get_tracks_by_date(df,date):
    new_df = df[df.date == date]
    return new_df[['trackname','artistname','minplayed']]

In [78]:
def get_top10_by_(df,group,column):
    if group =='':
         return df.groupby(column).agg({'minplayed':'sum','msplayed' : 'count'}).reset_index().sort_values(['minplayed','msplayed'], ascending=False).rename(columns={'minplayed': 'duration_min','msplayed':'total_count'}).head(10)    
                    
    else :    
        subgroup = df[group].drop_duplicates()
        new_df = subgroup.apply(lambda var : df[df[group] == var].groupby(column).agg({'minplayed':'sum','msplayed' : 'count'}).reset_index().sort_values(['minplayed','msplayed'], ascending=False).rename(columns={'minplayed': 'duration_min','msplayed':'total_count'}).head(10))    
        return reduce(lambda df1,df2 : df1.append(df2), new_df).sort_values([group,'duration_min'], ascending=(True,False))

In [84]:
def get_list_features_by_track(df,features):
    return df.merge(features, how='left', on='trackname')
    

In [82]:
var = get_top10_by_(my_data,'','trackname')

In [85]:
get_list_features_by_track(var,features)

Unnamed: 0,trackname,duration_min,total_count,artistname,followers,genres,popularity,artistType,danceability,energy,loudness,speechiness,instrumentalness,liveness,valence,tempo,duration_ms
0,Ginger Me,174.27255,59,,,,,,,,,,,,,,
1,Withholding Nothing Medley (Live),154.220167,16,,,,,,,,,,,,,,
2,King Of Kings,118.852667,34,,,,,,,,,,,,,,
3,What A Beautiful Name,114.08755,37,,,,,,,,,,,,,,
4,Bruxelles,111.270833,56,,,,,,,,,,,,,,
5,Le temps,109.2955,41,,,,,,,,,,,,,,
6,Beamer (Bad Boys),100.448283,55,,,,,,,,,,,,,,
7,Here I Am To Worship,96.789417,32,,,,,,,,,,,,,,
8,Ginger (feat. Burna Boy),96.644967,38,,,,,,,,,,,,,,
9,4 croisees,95.905583,27,,,,,,,,,,,,,,


## SPOTIFY API

In [10]:
client_id = 'ef0d92753d4e44658a3a28ce21de6845'
client_secret = '828972dcf8b84b9f8fe76e27cd570c6d'

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [101]:
def get_features(artist,track):  
    
    columns = ["artistname","followers","genres","popularity","artistType",
               "trackname","danceability","energy","loudness","speechiness","instrumentalness",
              "liveness","valence","tempo","duration_ms"]
    
    df = pd.DataFrame([[artist,"","","","",track,"","","","","","","","",""]],columns = columns )
    
    results = sp.search(q='artist:' + artist, type='artist')
    items = results['artists']['items']
    
    if len(items) > 0:
        #get artist features
        features = items[0]
        df.followers[0] = features['followers']['total']
        df.genres[0] = features['genres']
        df.popularity[0] = features['popularity']
        df.artistType[0] = features['type'] 
    
    results = sp.search(q='artist:'+artist+' track:'+track,type='track')
    items = results['tracks']['items']   
    
    if len(items) > 0:
        features = sp.audio_features(items[0]['id'])[0]
        if(features is not None):
            df.danceability[0] = features['danceability']
            df.energy[0] = features['energy']
            df.loudness[0] = features['loudness']
            df.speechiness[0] = features['speechiness']
            df.instrumentalness[0] = features['instrumentalness']
            df.liveness[0] = features['liveness']
            df.valence[0] = features['valence']
            df.tempo[0] = features['tempo']
            df.duration_ms[0] = features['duration_ms']                

    return df

In [102]:
def create_features_df(df): 
    data = df.drop_duplicates(subset=['artistname', 'trackname'], keep="first")
    inter_df = data.apply(lambda row : get_features(row.artistname, row.trackname),axis=1)
    return reduce(lambda df1,df2 : df1.append(df2), inter_df)

In [13]:
def get_list_genres(df) :
    liste = df.genres.apply(pd.Series).reset_index().melt(id_vars='index').dropna()[['index', 'value']]
    return liste

# ANALYSIS

## Personal data

In [52]:
my_data = load_myspotify_data()
my_data

Unnamed: 0,endtime,artistname,trackname,msplayed,duration,minplayed,starttime,date,year,month,week,dayofweek,day,hour,minute,quarter
0,2020-12-04 18:21:00,GIMS,YOLO,62314,0 days 00:01:02.314000,1.038567,2020-12-04 18:19:57.686,2020-12-04,2020,12,49,4,4,18,19,4
1,2020-12-05 00:01:00,Ya Levis,Lokesha,175885,0 days 00:02:55.885000,2.931417,2020-12-04 23:58:04.115,2020-12-04,2020,12,49,4,4,23,58,4
2,2020-12-05 15:53:00,Tayc,African Sugar (avec Tiwa Savage),2304,0 days 00:00:02.304000,0.038400,2020-12-05 15:52:57.696,2020-12-05,2020,12,49,5,5,15,52,4
3,2020-12-05 15:53:00,Axel Tony,Miel,2538,0 days 00:00:02.538000,0.042300,2020-12-05 15:52:57.462,2020-12-05,2020,12,49,5,5,15,52,4
4,2020-12-06 13:22:00,Burna Boy,African Giant,6997,0 days 00:00:06.997000,0.116617,2020-12-06 13:21:53.003,2020-12-06,2020,12,49,6,6,13,21,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6802,2021-12-05 17:25:00,Ninho,No Life,4522,0 days 00:00:04.522000,0.075367,2021-12-05 17:24:55.478,2021-12-05,2021,12,48,6,5,17,24,4
6803,2021-12-05 17:25:00,Ninho,No Life,108566,0 days 00:01:48.566000,1.809433,2021-12-05 17:23:11.434,2021-12-05,2021,12,48,6,5,17,23,4
6804,2021-12-05 17:28:00,Ninho,RER D,223101,0 days 00:03:43.101000,3.718350,2021-12-05 17:24:16.899,2021-12-05,2021,12,48,6,5,17,24,4
6805,2021-12-05 17:29:00,Ninho,YSL,15874,0 days 00:00:15.874000,0.264567,2021-12-05 17:28:44.126,2021-12-05,2021,12,48,6,5,17,28,4


In [15]:
my_data.describe()

Unnamed: 0,msplayed,duration,minplayed,year,month,week,dayofweek,day,hour,minute,quarter
count,6807.0,6807,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0,6807.0
mean,152962.5,0 days 00:02:32.962542089,2.549376,2020.950198,6.59527,26.793595,3.245923,15.306596,13.876891,29.058763,2.518143
std,170078.7,0 days 00:02:50.078699632,2.834645,0.217551,3.708326,16.191828,1.865848,9.504157,5.132148,17.213652,1.217556
min,0.0,0 days 00:00:00,0.0,2020.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
25%,47537.0,0 days 00:00:47.537000,0.792283,2021.0,3.0,10.0,2.0,5.0,10.0,14.0,1.0
50%,173333.0,0 days 00:02:53.333000,2.888883,2021.0,7.0,28.0,3.0,16.0,15.0,29.0,3.0
75%,202706.0,0 days 00:03:22.706000,3.378433,2021.0,10.0,42.0,5.0,24.0,18.0,43.0,4.0
max,5290736.0,0 days 01:28:10.736000,88.178933,2021.0,12.0,53.0,6.0,31.0,23.0,59.0,4.0


In [16]:
#Total number of songs

len(my_data)

6807

In [17]:
# Number of gathered songs

my_data['trackname'].nunique()

2022

In [18]:
# Number of gathered artist
my_data['artistname'].nunique()

856

In [19]:
get_quarter(my_data,'Q1')

Unnamed: 0,endtime,artistname,trackname,msplayed,duration,minplayed,starttime,date,year,month,week,dayofweek,day,hour,minute,quarter
339,2021-01-02 18:28:00,Rudeboy,Reason With Me,30079,0 days 00:00:30.079000,0.501317,2021-01-02 18:27:29.921,2021-01-02,2021,1,53,5,2,18,27,1
340,2021-01-02 18:28:00,Ninho,Lettre à une femme,576,0 days 00:00:00.576000,0.009600,2021-01-02 18:27:59.424,2021-01-02,2021,1,53,5,2,18,27,1
341,2021-01-02 18:29:00,Rudeboy,Reason With Me,6575,0 days 00:00:06.575000,0.109583,2021-01-02 18:28:53.425,2021-01-02,2021,1,53,5,2,18,28,1
342,2021-01-02 18:29:00,Ninho,Lettre à une femme,1489,0 days 00:00:01.489000,0.024817,2021-01-02 18:28:58.511,2021-01-02,2021,1,53,5,2,18,28,1
343,2021-01-02 18:29:00,Ninho,Lettre à une femme,190,0 days 00:00:00.190000,0.003167,2021-01-02 18:28:59.810,2021-01-02,2021,1,53,5,2,18,28,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2443,2021-03-31 14:34:00,WizKid,Ginger (feat. Burna Boy),5503,0 days 00:00:05.503000,0.091717,2021-03-31 14:33:54.497,2021-03-31,2021,3,13,2,31,14,33,1
2444,2021-03-31 14:34:00,Rema,Ginger Me,18730,0 days 00:00:18.730000,0.312167,2021-03-31 14:33:41.270,2021-03-31,2021,3,13,2,31,14,33,1
2445,2021-03-31 14:35:00,Omah Lay,Lo Lo,18325,0 days 00:00:18.325000,0.305417,2021-03-31 14:34:41.675,2021-03-31,2021,3,13,2,31,14,34,1
2446,2021-03-31 14:37:00,Omah Lay,Godly,175215,0 days 00:02:55.215000,2.920250,2021-03-31 14:34:04.785,2021-03-31,2021,3,13,2,31,14,34,1


In [20]:
get_total_duration_by_(my_data,'month')

Unnamed: 0,month,minplayed
0,1,1804.386533
1,2,1536.882467
2,3,1929.805183
3,4,905.068533
4,5,1588.499917
5,6,980.355683
6,7,1079.87565
7,8,1217.961767
8,9,1706.807517
9,10,1537.396183


In [21]:
get_total_duration_by_(my_data,['day','month','year'])

Unnamed: 0,day,month,year,minplayed
0,1,4,2021,17.869217
1,1,5,2021,21.284900
2,1,6,2021,12.584233
3,1,7,2021,17.264567
4,1,9,2021,157.710167
...,...,...,...,...
220,31,1,2021,86.860300
221,31,3,2021,14.789967
222,31,7,2021,34.475533
223,31,8,2021,51.184217


In [22]:
get_moy_duration_by_(my_data,'dayofweek')

Unnamed: 0,dayofweek,amount
2,2,2.920696
5,5,2.74976
3,3,2.693855
6,6,2.49769
1,1,2.412191
4,4,2.302944
0,0,2.24949


In [23]:
get_total_duration_by_(my_data,'hour')

Unnamed: 0,hour,minplayed
0,0,243.450033
1,1,139.28405
2,2,194.985333
3,3,150.993067
4,4,153.539067
5,5,171.866283
6,6,334.186217
7,7,702.86165
8,8,674.803883
9,9,659.84595


In [27]:
#the most appearing artists
my_data \
    .groupby('artistname') \
    .sum()['msplayed'] \
    .reset_index() \
    .sort_values('msplayed', ascending=False) \
    .rename(columns={'msplayed': 'amount'}) \
    .head(10)

Unnamed: 0,artistname,amount
690,Si Maman M'avait Dit,40701480
386,Kanye West,27721115
627,Rema,26459662
98,Booba,24029590
382,Kalash,23386395
301,Hillsong Worship,23101648
814,William McDowell,22354874
818,WizKid,22245920
740,Tayc,21014733
240,Fally Ipupa,18774919


In [28]:
#the most appearing artists
my_data \
    .groupby('trackname') \
    .sum()['msplayed'] \
    .reset_index() \
    .sort_values('msplayed', ascending=False) \
    .rename(columns={'msplayed': 'amount'}) \
    .head(10)

Unnamed: 0,trackname,amount
722,Ginger Me,10456353
1953,Withholding Nothing Medley (Live),9253210
976,King Of Kings,7131160
1929,What A Beautiful Name,6845253
324,Bruxelles,6676250
1044,Le temps,6557730
237,Beamer (Bad Boys),6026897
793,Here I Am To Worship,5807365
721,Ginger (feat. Burna Boy),5798698
34,4 croisees,5754335


In [29]:
#Top five artists from each year

counted_quarter_df = my_data \
    .assign(quarter_added=my_data.quarter) \
    .groupby(['artistname', 'quarter']) \
    .sum()['msplayed'] \
    .reset_index() \
    .sort_values('msplayed', ascending=False)

in_top_5_quarter_artist = counted_quarter_df \
    .groupby('quarter') \
    .head(5) \
    .artistname \
    .unique()


counted_quarter_df \
    [counted_quarter_df.artistname.isin(in_top_5_quarter_artist)] \
    .pivot('artistname', 'quarter', 'msplayed') \
    .fillna(0) \
    .style.background_gradient()

quarter,1,2,3,4
artistname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Beyoncé,8075898.0,1702893.0,682714.0,0.0
Booba,17547819.0,387962.0,671052.0,5422757.0
Burna Boy,7534602.0,2526331.0,2813454.0,4849321.0
Drake,0.0,811857.0,10529465.0,11593.0
Fally Ipupa,5534798.0,5786728.0,1425688.0,6027705.0
Hillsong Worship,1604192.0,7780871.0,1631942.0,12084643.0
Kalash,2847654.0,329204.0,1636282.0,18573255.0
Kanye West,0.0,169676.0,18469641.0,9081798.0
Lady Gaga,220626.0,6734854.0,0.0,0.0
Maahlox Le Vibeur,0.0,0.0,1176683.0,8587301.0


## Features added

In [None]:
features = create_features_df(my_data)

In [None]:
features.to_csv("data/features.csv")

In [None]:
features = pd.read_csv("data/features.csv")

In [None]:
features

In [None]:
features = features.drop("Unnamed: 0",axis = 1)

In [34]:
tracks_with_features_df = my_data.merge(features, on=['artistname','trackname'], how='left')

In [35]:
tracks_with_features_df 

Unnamed: 0,endtime,artistname,trackname,msplayed,duration,minplayed,starttime,date,year,month,...,artistType,danceability,energy,loudness,speechiness,instrumentalness,liveness,valence,tempo,duration_ms
0,2020-12-04 18:21:00,GIMS,YOLO,62314,0 days 00:01:02.314000,1.038567,2020-12-04 18:19:57.686,2020-12-04,2020,12,...,,,,,,,,,,
1,2020-12-05 00:01:00,Ya Levis,Lokesha,175885,0 days 00:02:55.885000,2.931417,2020-12-04 23:58:04.115,2020-12-04,2020,12,...,,,,,,,,,,
2,2020-12-05 15:53:00,Tayc,African Sugar (avec Tiwa Savage),2304,0 days 00:00:02.304000,0.038400,2020-12-05 15:52:57.696,2020-12-05,2020,12,...,,,,,,,,,,
3,2020-12-05 15:53:00,Axel Tony,Miel,2538,0 days 00:00:02.538000,0.042300,2020-12-05 15:52:57.462,2020-12-05,2020,12,...,artist,0.596,0.662,-9.055,0.0451,0.000014,0.132,0.431,153.908,213368.0
4,2020-12-06 13:22:00,Burna Boy,African Giant,6997,0 days 00:00:06.997000,0.116617,2020-12-06 13:21:53.003,2020-12-06,2020,12,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6802,2021-12-05 17:25:00,Ninho,No Life,4522,0 days 00:00:04.522000,0.075367,2021-12-05 17:24:55.478,2021-12-05,2021,12,...,,,,,,,,,,
6803,2021-12-05 17:25:00,Ninho,No Life,108566,0 days 00:01:48.566000,1.809433,2021-12-05 17:23:11.434,2021-12-05,2021,12,...,,,,,,,,,,
6804,2021-12-05 17:28:00,Ninho,RER D,223101,0 days 00:03:43.101000,3.718350,2021-12-05 17:24:16.899,2021-12-05,2021,12,...,artist,0.632,0.720,-7.163,0.3450,0.000050,0.303,0.165,183.003,223101.0
6805,2021-12-05 17:29:00,Ninho,YSL,15874,0 days 00:00:15.874000,0.264567,2021-12-05 17:28:44.126,2021-12-05,2021,12,...,artist,0.712,0.801,-6.384,0.3310,0.000000,0.227,0.281,121.024,218596.0


In [36]:
get_list_genres(artists).value

NameError: name 'artists' is not defined