# Spotify Data Project

The goal of this project is to transform my Spotify data into a series of visual graphics that can tell me about my listening habits, including who I listen to, when, and how often.

In [1]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import json
from pprint import pprint

import plotly
plotly.tools.set_credentials_file(username=(your_username), api_key=(your_api_key))
import plotly.plotly as py
import plotly.graph_objs as go

pd.set_option('display.max_rows', 500)

In [51]:
# Read in the data from a Json format and look at the results

with open('C:\\Users\\jbean\\Dropbox\\Other\\Python\\Spotify\\StreamingHistory.json', encoding='utf8') as f:
    data = json.load(f)

pprint(data[:5])

[{'artistName': 'Ludovico Einaudi',
  'endTime': '2018-08-08 00:03',
  'msPlayed': 357706,
  'trackName': 'Nuvole bianche'},
 {'artistName': 'Frédéric Chopin',
  'endTime': '2018-08-08 00:49',
  'msPlayed': 269000,
  'trackName': 'Nocturne No.2 In E Flat, Op.9 No.2'},
 {'artistName': 'Joep Beving',
  'endTime': '2018-08-08 00:54',
  'msPlayed': 288280,
  'trackName': 'Ab Ovo'},
 {'artistName': 'Claude Debussy',
  'endTime': '2018-08-08 00:56',
  'msPlayed': 136320,
  'trackName': 'Beau Soir, L. 6'},
 {'artistName': 'John Rutter',
  'endTime': '2018-08-08 01:01',
  'msPlayed': 289120,
  'trackName': 'A Gaelic Blessing: Meditation'}]


In [3]:
# Convert from JSON to a data frame

spotify_data = pd.DataFrame()

def extract_json_value(column_name):
    
    return [i[column_name] for i in data]

spotify_data['artist_name'] = extract_json_value('artistName')
spotify_data['end_time'] = extract_json_value('endTime')
spotify_data['ms_played'] = extract_json_value('msPlayed')
spotify_data['track_name'] = extract_json_value('trackName')

In [4]:
spotify_data.sample(10)

Unnamed: 0,artist_name,end_time,ms_played,track_name
3805,Luke Bryan,2018-11-04 16:36,199853,Way Way Back
2181,Ari Afsar,2018-09-21 16:26,182827,First Burn
1450,Lukas Graham,2018-09-05 19:25,8085,7 Years
3599,for KING & COUNTRY,2018-10-29 00:52,3297,Burn The Ships
2504,Kevin Olusola,2018-10-03 20:44,217580,As Long as You Love Me
290,Parachute,2018-08-16 11:22,125699,Forever And Always
3555,Cody Simpson,2018-10-28 13:34,98960,I Fall in Love Too Easily
1442,Post Malone,2018-09-05 19:12,30209,rockstar (feat. 21 Savage)
896,Johannes Bornlöf,2018-08-25 20:19,152746,Reminiscence
3053,Bebe Rexha,2018-10-14 14:59,195519,I'm a Mess


In [5]:
spotify_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3896 entries, 0 to 3895
Data columns (total 4 columns):
artist_name    3896 non-null object
end_time       3896 non-null object
ms_played      3896 non-null int64
track_name     3896 non-null object
dtypes: int64(1), object(3)
memory usage: 121.8+ KB


# Format columns and add variables

In [6]:
# Convert the timestamp column to datetime

spotify_data['end_time'] = pd.to_datetime(spotify_data['end_time'])

In [7]:
# Add additional time columns for more interpretable times; drop the MS column

spotify_data['seconds_played'] = spotify_data.ms_played.divide(1000)
spotify_data['minutes_played'] = spotify_data.seconds_played.divide(60)

spotify_data.drop('ms_played', axis=1, inplace=True)

In [8]:
spotify_data.sample(10)

Unnamed: 0,artist_name,end_time,track_name,seconds_played,minutes_played
2667,Griffin Peterson,2018-10-07 13:49:00,Hold on to You (Acoustic),228.642,3.8107
1625,Alessia Cara,2018-09-08 14:20:00,Growing Pains,193.68,3.228
3282,Robinson,2018-10-19 01:33:00,Medicine,121.957,2.032617
3567,Jess Ray,2018-10-28 14:11:00,What Have We Found Ourselves In,36.884,0.614733
2325,The Black Eyed Peas,2018-09-27 02:17:00,Where Is The Love?,272.533,4.542217
964,Dan + Shay,2018-08-26 15:30:00,Speechless - The Vocals,206.911,3.448517
2511,Liam Payne,2018-10-03 21:07:00,For You (Fifty Shades Freed) (& Rita Ora),245.453,4.090883
790,BYU Noteworthy,2018-08-25 15:42:00,When You Believe,0.0,0.0
105,Caleb and Kelsey,2018-08-11 15:11:00,Shout to the Lord / God of Wonders,232.079,3.867983
3335,The Willis Clan,2018-10-19 20:06:00,Ring of Fire,1.253,0.020883


# Most populat artists and songs

In [9]:
# Find the most popular artists by number of times played

most_popular_artists_by_count = spotify_data.groupby(by='artist_name')['track_name'].count().sort_values(ascending=False)[:25]

print('The most played artists by count were: \n\n{}'.format(most_popular_artists_by_count))

The most played artists by count were: 

artist_name
The Script             114
Pentatonix             111
Olly Murs               64
Lady Gaga               48
Kurt Hugo Schneider     46
Imagine Dragons         44
for KING & COUNTRY      39
Hans Zimmer             37
Timeflies               37
Calum Scott             34
James Arthur            28
OneRepublic             28
Dan + Shay              26
Jason Derulo            26
James TW                26
MKTO                    25
NF                      24
Anthem Lights           23
Mat Kearney             23
Jason Mraz              23
Clean Bandit            22
Niall Horan             22
Andy Grammer            20
MAX                     19
Tori Kelly              19
Name: track_name, dtype: int64


In [10]:
# Visualize the most popular artists with a standard bar chart

data = [
    
    go.Bar(
            x=most_popular_artists_by_count.index,
            y=most_popular_artists_by_count,
            text=most_popular_artists_by_count,
            textposition='auto',
            opacity=0.75
            
    )]

layout = go.Layout(
    title='Popularity of Artists by Count',
    
    yaxis= dict(
        title='Number of Times Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
        titlefont=dict(size=15))
)

fig = go.Figure(data=data, layout=layout)
        
py.iplot(fig, filename='popular_artists')

In [11]:
# Look at most popular artists by amount of time played

most_popular_artists_by_time = spotify_data.groupby(by='artist_name')['minutes_played'].sum().sort_values(ascending=False)[:20]

most_popular_artists_by_time

artist_name
The Script                      334.987650
Pentatonix                      279.596567
for KING & COUNTRY              125.981867
Kurt Hugo Schneider             125.540300
Olly Murs                       125.466100
Lady Gaga                       122.574817
Imagine Dragons                 118.277117
Timeflies                       117.157033
Calum Scott                     112.745650
Carl Ditters von Dittersdorf     86.662800
James Arthur                     82.378333
Ed Sheeran                       72.605350
James TW                         71.762767
Jason Derulo                     70.092317
Jason Mraz                       68.385117
OneRepublic                      66.710133
NF                               64.937500
MAX                              63.762217
MKTO                             62.674317
Clean Bandit                     60.686250
Name: minutes_played, dtype: float64

In [12]:
# Look at the most popular songs played

most_popular_songs = spotify_data.track_name.value_counts().sort_values(ascending=False)[:20]

most_popular_songs

Perfect                                                                31
Goodbye (feat. Nicki Minaj & Willy William)                            21
Seeing Blind                                                           15
Always Remember Us This Way                                            15
Rockabye (feat. Sean Paul & Anne-Marie)                                14
joy.                                                                   14
Zero - From the Original Motion Picture "Ralph Breaks The Internet"    13
If You Were Mine                                                       13
Si No Vuelves                                                          13
High Hopes                                                             13
Heartstrings                                                           12
Amen                                                                   12
Anywhere With You Is Home                                              12
Have It All                           

# Time of day to listen

In [13]:
# Create time of day variable

def time_of_day(datetime_column, df=spotify_data):
    
    """
    Takes in a datetime column and returns the time of day that the datetime occurs.
    
    Before 12 PM is considered morning, between 12 PM and 5 PM afternoon, and after 5 PM evening.
    """
    
    time_of_day = []
    
    for i in df[datetime_column]:
        
        i = i.hour
        
        if i <= 12:
            
            time_of_day.append('morning')
            
        elif i < 17:
            
            time_of_day.append('afternoon')
            
        else:
            
            time_of_day.append('night')
    
    time_of_day = pd.Categorical(time_of_day, categories=['morning','afternoon','night'], ordered=True)
            
    return time_of_day

In [14]:
from datetime import datetime
import time

def datetime_from_utc_to_local(utc_datetime):
    
    """
    Converts a column from a UTC timestamp to local time, then returns the local time.
    """
    now_timestamp = time.time()
    offset = datetime.fromtimestamp(now_timestamp) - datetime.utcfromtimestamp(now_timestamp)
    return utc_datetime + offset

In [15]:
## Convert from UTC time to eastern time

spotify_data['local_time'] = datetime_from_utc_to_local(spotify_data.end_time)
spotify_data['local_time_of_day'] = time_of_day('local_time')

In [16]:
# Add day of week and organize days as categories

spotify_data['local_day_of_week'] = spotify_data['local_time'].dt.day_name()

spotify_data['local_day_of_week'] = pd.Categorical(spotify_data['local_day_of_week'], 
                                   categories=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday'], 
                                   ordered=True)

In [17]:
## Create a pivot table by time of day and day of week

time_of_day_local_pivot = spotify_data.pivot_table(columns='local_time_of_day', index='local_day_of_week', 
                                             values='minutes_played', aggfunc=np.sum)

start_date = spotify_data.local_time.min()
end_date = spotify_data.local_time.max()

difference_in_weeks = (end_date - start_date).days / (7)
time_of_day_utc_pivot = time_of_day_local_pivot.divide(difference_in_weeks)

In [18]:
trace = go.Heatmap(z=time_of_day_utc_pivot.values,
                  x=time_of_day_utc_pivot.columns,
                  y=time_of_day_utc_pivot.index,
                  colorscale='Greens',
                  reversescale=True)
data=[trace]
py.iplot(data, filename='spotify_heatmap')

# Higher likelihood to play a song

In [19]:
## Read in playlsit data

with open('C:\\Users\\jbean\\Dropbox\\Other\\Python\\Spotify\\Playlist.json', encoding='utf8') as f:
    playlist_data = json.load(f)

In [20]:
my_playlist = pd.DataFrame()

def extract_json_value_playlist(column_name, data_source = playlist_data):
    
    """
    Takes in a column from a JSON format, extracts the values, and returns a column that can be used
    in a data frame
    """
    
    column_values = []
    
    for i in range([len(i) for i in data_source['playlists']][0]):
        try:
            
            for k in range(len(data_source['playlists'][i]['tracks'])):

                column_values.append(data_source['playlists'][i]['tracks'][k][column_name])
                
        except:
            continue

    return column_values

my_playlist['artist_name'] = extract_json_value_playlist('artistName')
my_playlist['track_name'] = extract_json_value_playlist('trackName')

In [21]:
my_playlist.sample(10)

Unnamed: 0,artist_name,track_name
57,BYU Noteworthy,When You Believe
91,Mat Kearney,Learning To Love Again
75,P!nk,Just Like Fire (From the Original Motion Pictu...
106,Rudimental,"These Days (feat. Jess Glynne, Macklemore & Da..."
46,The Nor'easters,Rise Up
20,The Piano Guys,In My Blood / Swan Lake
160,Jack & Jack,Flights
162,Family of the Year,Hero
139,Nelly,Die a Happy Man
49,Olly Murs,Never Been Better


In [22]:
# Read in the library information

with open('C:\\Users\\jbean\\Dropbox\\Other\\Python\\Spotify\\YourLibrary.json', encoding='utf8') as f:
    library_data = json.load(f)

In [23]:
library_df = pd.DataFrame()

def extract_json_value_library(column_name, data_source=library_data):
    
    """
    Takes in a JSON series of values and converts them to a list
    """
    
    return [i[column_name] for i in data_source['tracks']]

library_df['artist_name'] = extract_json_value_library('artist')
library_df['album'] = extract_json_value_library('album')
library_df['track_name'] = extract_json_value_library('track')

In [24]:
library_df.sample(10)

Unnamed: 0,artist_name,album,track_name
7,Hans Zimmer,Inception,528491
139,Gente De Zona,Si No Vuelves,Si No Vuelves
93,QUEEVA,Who You Really Are,Who You Really Are
37,Liam Payne,Fifty Shades Freed,For You (Fifty Shades Freed) (& Rita Ora)
48,AC.jR & BradyJames,If I Lose Myself,If I Lose Myself
73,Mass Anthem,Wait for You / Apologize / How to Save a Life,Wait for You / Apologize / How to Save a Life
157,Pentatonix,"PTX Presents: Top Pop, Vol. I",New Rules x Are You That Somebody?
122,Gavin DeGraw,Something Worth Saving,Something Worth Saving
1,Ziv Zaifman,The Greatest Showman (Original Motion Picture ...,A Million Dreams
105,Nico Santos,Rooftop,Rooftop


In [25]:
## Create a series of zipped artist and track names to be able to match values and see song overlap

artist_name_track_name_all = list(zip(spotify_data.artist_name, spotify_data.track_name))
artist_name_track_name_playlist = list(zip(my_playlist.artist_name, my_playlist.track_name))
artist_name_track_name_library = list(zip(library_df.artist_name, library_df.track_name))

In [26]:
# Determine if a given song/artist combination is in a playlist or library

song_in_playlist = [1 if i in artist_name_track_name_playlist else 0 for i in artist_name_track_name_all]
song_in_library = [1 if i in artist_name_track_name_library else 0 for i in artist_name_track_name_all]

In [27]:
# Add lists created above to the spotify data frame

spotify_data['song_in_playlist'] = song_in_playlist
spotify_data['song_in_library'] = song_in_library

In [28]:
print('{0:.0%} of the total songs played were also featured in a playlist'.format(spotify_data.song_in_playlist.sum()\
                                                                                 / (spotify_data.shape[0])))
print('{0:.0%} of the total songs played were also featured in my library'.format(spotify_data.song_in_library.sum()\
                                                                                 / (spotify_data.shape[0])))

11% of the total songs played were also featured in a playlist
26% of the total songs played were also featured in my library


In [29]:
# Determine the number of times a song was played if it's in a playlist or library compared to not being in one

num_times_played_playlist = []
num_times_played_nonplaylist = []

num_times_played_library = []
num_times_played_nonlibrary = []

for artist_song in set(artist_name_track_name_all):
    
    song_df = spotify_data[(spotify_data.artist_name == artist_song[0]) & (spotify_data.track_name == artist_song[1])]
        
    if (song_df.song_in_playlist.sum() > 0):
        
        num_times_played_playlist.append(len(song_df))
        
    else:
        
        num_times_played_nonplaylist.append(len(song_df))
        
    if (song_df.song_in_library.sum() > 0):
        
        num_times_played_library.append(len(song_df))
        
    else:
        
        num_times_played_nonlibrary.append(len(song_df))

In [31]:
x_data = ['Non Library', 'Library', 'Non Playlist', 'Playlist']

y_data = [num_times_played_nonlibrary, num_times_played_library, num_times_played_nonplaylist, num_times_played_playlist]

colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)', 'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

traces = []

for xd, yd, cls in zip(x_data, y_data, colors):
        traces.append(go.Box(
            y=yd,
            name=xd,
            boxpoints='all',
            jitter=0.5,
            whiskerwidth=0.2,
            fillcolor=cls,
            marker=dict(
                size=2,
            ),
            line=dict(width=1),
        ))

layout = go.Layout(
    title='Frequency of Song Playing by Category',
    yaxis=dict(
        autorange=True,
        showgrid=True,
        zeroline=True,
        dtick=5,
        gridcolor='rgb(255, 255, 255)',
        gridwidth=1,
        zerolinecolor='rgb(255, 255, 255)',
        zerolinewidth=2,
        title='Number of Plays'
    ),
    margin=dict(
        l=40,
        r=30,
        b=80,
        t=100,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    showlegend=False
)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

# Number of minutes listened per day

In [32]:
# Determine the number of minutes per day that I listened to songs

number_of_minutes_per_day = spotify_data.set_index('local_time')
number_of_minutes_per_day = pd.DataFrame(number_of_minutes_per_day.groupby(\
                            by=number_of_minutes_per_day.index.date)['minutes_played'].sum())

In [33]:
number_of_minutes_per_day.head()

Unnamed: 0,minutes_played
2018-08-07,98.406583
2018-08-09,173.769567
2018-08-10,65.25645
2018-08-11,142.560117
2018-08-12,156.5489


In [37]:
from plotly.grid_objs import Grid, Column
import plotly.figure_factory as ff

import time
from datetime import datetime

In [38]:
# Create a plot.ly-friendly table for minutes per day

table = ff.create_table(number_of_minutes_per_day)
py.iplot(table, filename='minutes_per_day_spotify')

In [39]:
# Formatting for the plot.ly chart based on code adapted from an example in their gallery

minutes_played = list(number_of_minutes_per_day['minutes_played'])

my_columns = []

for k in range(len(number_of_minutes_per_day.index) - 1):
    my_columns.append(Column(number_of_minutes_per_day.index[:k + 1], 'x{}'.format(k + 1)))   
    my_columns.append(Column(minutes_played[:k + 1], 'y{}'.format(k + 1)))
grid = Grid(my_columns)
py.grid_ops.upload(grid, 'minutes_played' + str(time.time()), auto_open=False)

'https://plot.ly/~jbean01/134/'

In [40]:
data=[dict(type='scatter',
           xsrc=grid.get_column_reference('x1'),
           ysrc= grid.get_column_reference('y1'),
           name='Spotify',
           mode='lines',
           line=dict(color= 'rgb(0, 153, 255)'),
           fill='tozeroy',
           fillcolor='rgba(153, 204, 255, 0.5)')]

axis=dict(ticklen=4,
          mirror=True,
          zeroline=False,
          showline=True,
          autorange=False,
          showgrid=False)

layout = dict(title='Minutes Played per Day Spotify',
              font=dict(family='Balto'),
              showlegend=False,
              autosize=False,
              width=800,
              height=400,
              xaxis=dict(axis, **{'nticks':12, 'tickangle':-45,
                                  'range': [min(number_of_minutes_per_day.index),
                                            max(number_of_minutes_per_day.index)]}),
              yaxis=dict(axis, **{'title': 'Minutes', 'range':[0,max(number_of_minutes_per_day.minutes_played)+5]}),
              updatemenus=[dict(type='buttons',
                                showactive=False,
                                y=1,
                                x=1.1,
                                xanchor='right',
                                yanchor='top',
                                pad=dict(t=0, r=10),
                                buttons=[dict(label='Play',
                                              method='animate',
                                              args=[None, dict(frame=dict(duration=50, redraw=False), 
                                                               transition=dict(duration=0),
                                                               fromcurrent=True,
                                                               mode='immediate')])])])

frames=[{'data':[{'xsrc': grid.get_column_reference('x{}'.format(k + 1)),
                  'ysrc': grid.get_column_reference('y{}'.format(k + 1))}],
         'traces': [0]
        } for k in range(len(number_of_minutes_per_day.index) - 1)]

fig=dict(data=data, layout=layout, frames=frames)
py.icreate_animations(fig, 'Minutes_Per_Day_Spotify' + str(time.time()))

# Listening by Day of Week

In [41]:
# Set the index as the column for easier slicing

date_index = spotify_data.set_index('local_time')
date_index.index = date_index.index.date

In [42]:
# Merge the date index with day of week; remove duplicated values

merged_minutes_per_day = number_of_minutes_per_day.merge(date_index[['local_day_of_week']], left_index=True, right_index=True,
                                                        how='left', validate='1:m')

merged_minutes_per_day = merged_minutes_per_day[~merged_minutes_per_day.index.duplicated(keep='first')]

In [43]:
merged_minutes_per_day.head()

Unnamed: 0,minutes_played,local_day_of_week
2018-08-07,98.406583,Tuesday
2018-08-09,173.769567,Thursday
2018-08-10,65.25645,Friday
2018-08-11,142.560117,Saturday
2018-08-12,156.5489,Sunday


In [44]:
# Find the number of minutes played per day of week

days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']

time_per_day = []

for day in days:
    
    df_days = merged_minutes_per_day[merged_minutes_per_day.local_day_of_week == day]
    
    time_per_day.append(list(df_days['minutes_played']))

In [45]:
traces = []

for i, j in enumerate(days):
    
    trace = go.Box(
    y=time_per_day[i],
    name = j,
    boxmean=True
    )
    traces.append(trace)

data = traces
py.iplot(data)

# Scatter Plot of Artist Plays

In [46]:
def time_of_day_scatter_df(df = spotify_data, time_of_day=None):
    
    """
    Takes in a data frame, filters it for the time of day if the variable is not None, and finds the 
    frequency of plays, minutes played, and unique songs by artists. The function returns a data frame
    with this information.
    """
    
    if time_of_day:
        df = df[df['local_time_of_day'] == time_of_day]
        
    grouped_artists = df.groupby(by='artist_name')
    
    number_of_plays = grouped_artists['track_name'].count()
    minutes_played = grouped_artists['minutes_played'].sum()
    unique_songs_played = [len(i) for i in grouped_artists['track_name'].unique()]
    
    scatter_df = pd.DataFrame({'number_of_plays':number_of_plays, 
                           'minutes_played':minutes_played,
                           'unique_songs_played':unique_songs_played},
                         index = number_of_plays.index)
    
    return scatter_df

In [47]:
scatter_df = time_of_day_scatter_df()

In [48]:
scatter_df.head()

Unnamed: 0_level_0,number_of_plays,minutes_played,unique_songs_played
artist_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5 Seconds of Summer,7,19.0431,2
A Great Big World,2,7.458883,2
A N X,3,8.567783,3
A R I Z O N A,7,18.080567,3
AC.jR & BradyJames,4,15.21005,2


In [49]:
trace = go.Scatter(
    
    x = scatter_df.number_of_plays,
    
    y = [round(i, 1) for i in scatter_df.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_df.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_df.loc[i, 'unique_songs_played']) for i in scatter_df.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_df.unique_songs_played], cmin=0, cmax = len(scatter_df),
                  colorscale='Viridis', color=[i for i in range(len(scatter_df))]),
    
    mode = 'markers'
)

data = [trace]

layout = go.Layout(
    title='Amount of Time Listened and Songs by Artist',
    xaxis=dict(
        title='Number of Songs Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    yaxis=dict(
        title='Minutes Played',
        gridcolor='rgb(255, 255, 255)',
        zerolinewidth=1,
        ticklen=5,
        gridwidth=2,
    ),
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    
    annotations=[
        dict(
            
            text='Size = Number of Unique Songs Played',
            x=130,
            y=20,
            font={'color':'black'},
            bordercolor='#c7c7c7',
            borderwidth=2,
            borderpad=4,
            bgcolor='#fffaf0',
            opacity=0.8,
            showarrow=False

        )
    ]
)


fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='artist-scatter')

In [50]:
button_layer_1_height = 1.12
button_layer_2_height = 1.065

scatter_morning = time_of_day_scatter_df(time_of_day='morning')
scatter_afternoon = time_of_day_scatter_df(time_of_day='afternoon')
scatter_evening = time_of_day_scatter_df(time_of_day='night')

trace_all = go.Scatter(
    
    x = scatter_df.number_of_plays,
    
    y = [round(i, 1) for i in scatter_df.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_df.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_df.loc[i, 'unique_songs_played']) for i in scatter_df.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_df.unique_songs_played], cmin=0, cmax = len(scatter_df),
                  colorscale='Viridis', color=[i for i in range(len(scatter_df))]),
    
    mode = 'markers',
    visible=True,
    name='All'
)

trace_morning = go.Scatter(
    
    x = scatter_morning.number_of_plays,
    
    y = [round(i, 1) for i in scatter_morning.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_morning.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_morning.loc[i, 'unique_songs_played']) for i in scatter_morning.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_morning.unique_songs_played], cmin=0, cmax = len(scatter_morning),
                  colorscale='Viridis', color=[i for i in range(len(scatter_morning))]),
    
    mode = 'markers',
    visible=False,
    name='Morning'
)

trace_afternoon = go.Scatter(
    
    x = scatter_afternoon.number_of_plays,
    
    y = [round(i, 1) for i in scatter_afternoon.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_afternoon.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_afternoon.loc[i, 'unique_songs_played']) for i in scatter_afternoon.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_afternoon.unique_songs_played], cmin=0, cmax = len(scatter_afternoon),
                  colorscale='Viridis', color=[i for i in range(len(scatter_afternoon))]),
    
    mode = 'markers',
    visible=False,
    name='Afternoon'
    
)

trace_evening = go.Scatter(
    
    x = scatter_evening.number_of_plays,
    
    y = [round(i, 1) for i in scatter_evening.minutes_played],
    
    text = ['Artist: ' + str(i) + '<br>Minutes Played: ' + str(round(scatter_evening.loc[i,'minutes_played'], 1)) +
            '<br>Unique Songs: ' + str(scatter_evening.loc[i, 'unique_songs_played']) for i in scatter_evening.index],
   
    marker = dict(size = [round(i, 1) for i in scatter_evening.unique_songs_played], cmin=0, cmax = len(scatter_evening),
                  colorscale='Viridis', color=[i for i in range(len(scatter_evening))]),
    
    mode = 'markers',
    visible=False,
    name='Evening'
    
)

data = [trace_all, trace_morning, trace_afternoon, trace_evening]

updatemenus = list([
    dict(
         buttons=list([   
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, False, False, False]},
                         {'title': 'All Day'}]),
            dict(label = 'Morning',
                 method = 'update',
                 args = [{'visible': [False, True, False, False]},
                         {'title': 'Morning'}]),
            dict(label = 'Afternoon',
                 method = 'update',
                 args = [{'visible': [False, False, True, False]},
                         {'title': 'Afternoon'}]),
            dict(label = 'Evening',
                 method = 'update',
                 args = [{'visible': [False, False, False, True]},
                         {'title': 'Evening'}])
        ]),
        type='buttons',
        direction = 'right',
        pad = {'r': 10, 't': 10},
        showactive = True,
        x = 0.3,
        xanchor = 'left',
        y = button_layer_1_height,
        yanchor = 'top' )
])
                
layout = dict(title='Artist Variance by Time of Day', showlegend=False,
              updatemenus=updatemenus)

fig = dict(data=data, layout=layout)

py.iplot(fig, filename='artist_by_time_of_day')