In [None]:
# user_id = 'notbobbobby'

# GET PLAYLISTS
# playlists = get_user_playlists(user_id, [discover_database=True])

# GET TRACKS
# tracks = get_user_tracks(user_id, playlists)

# GET FEATURES
# features = get_user_unique_tracks_and_features(user_id, tracks)

# COMBINE INTO ONE DATAFRAME
# df = pd.merge(tracks, playlists, how='outer', on=['playlist_id', 'playlist_name'])
# df = pd.merge(df, features, how='outer', on=['track_id', 'track_name'])

# Note: indieair showed missing songs, which indicates the discrepancies between
#       total_tracks of the playlists and output number of tracks

import os
import pandas as pd
import altair as alt

ROOT = os.getcwd()
def get_data(db, user, time, kind='library'):
    if db == '_history':
        path = os.path.expanduser(os.path.join(ROOT, db, user, '{}_{}_{}.json'.format(user, time, kind)))
    else:
        path = os.path.expanduser(os.path.join(ROOT, db, user, time, '{}_{}_{}.json'.format(user, time, kind)))
    return pd.read_json(path, orient='records', lines=True)

library = get_data('_db', 'notbobbobby', '2018-10-31')
discover = get_data('_db', 'discover', '2018-10-31')

history_n = get_data('_history', 'notbobbobby', '2018', kind='history')
history_d = get_data('_history', 'deedanvy', '2018', kind='history')
history_c = get_data('_history', 'c.bochulak', '2018', kind='history')
history = pd.concat([history_n, history_d, history_c])

from datetime import timedelta
history['played_at'] = history['played_at'].apply(lambda t: t - timedelta(hours=6))

In [None]:
# Q: What does my hourly listening history look like for each month, and any subset of days?

# A: I want to look at the total count at each hour during the day
#    I also want to look at the total count at each (month, day) pair
#    Then, I want to only show my hourly listning using only the days from which I choose

alt.data_transformers.enable('default', max_rows=None)

date_selection = alt.selection_interval(encodings=['x', 'y'])
user_selection = alt.selection_multi(encodings=['y'], empty='none')

chart_m = alt.Chart(history) \
    .mark_bar(stroke='transparent', size=12) \
    .encode(
        x=alt.X(
            'played_at:T',
            timeUnit='hours',
            axis=alt.Axis(title='', grid=False),
            scale=alt.Scale(domain=[0, 23])),
        y=alt.Y(
            'count():Q',
            axis=alt.Axis(title='total plays', grid=False)),
        color=alt.value('#1DB954'),
        tooltip=[alt.Tooltip('count():Q', title='tracks')]) \
    .transform_filter(date_selection) \
    .transform_filter(user_selection) \
    .properties(title='Listening Metrics', width=600)

chart_o = alt.Chart(history) \
    .mark_bar(stroke='transparent') \
    .encode(
        x=alt.X(
            'played_at:O',
            timeUnit='date',
            axis=alt.Axis(title='calendar', grid=False)),
        y=alt.Y(
            'played_at:O',
            timeUnit='month',
            axis=alt.Axis(title='', grid=False)),
        color=alt.condition(
            date_selection,
            'count():Q',
            alt.value('lightgray'),
            scale=alt.Scale(range=['#1DB954', '#191414']),
            legend=None),
        tooltip=[alt.Tooltip('count():Q', title='tracks')]) \
    .transform_filter(user_selection) \
    .properties(
        width=600,
        height=80,
        selection=date_selection)

chart_p = alt.Chart(history) \
    .mark_circle(size=100) \
    .encode(
        y=alt.Y('user_display_name:N', axis=alt.Axis(title='', grid=False)),
        color=alt.condition(
            user_selection,
            'user_display_name',
            alt.value('lightgray'),
            scale=alt.Scale(range=['#1DB954', '#16BAC5', '#883677']),
            legend=None)) \
    .properties(selection=user_selection)

(chart_m | chart_p) & (chart_o)

In [None]:
# Q: What are the top 10 songs I listen to each month?

# A: I need to look at the total count of each song played when grouped by user and month.
#    Then, I need to take the 10 largest track counts for each unique user and month

top = history \
    .groupby(['user_display_name', pd.Grouper(key='played_at', freq='M'), 'track_name', 'album', 'artists']) \
    .agg({'track_id': 'count', 'duration_ms': 'sum'}) \
    .reset_index() \
    [['user_display_name', 'played_at', 'track_name', 'album', 'artists', 'track_id', 'duration_ms']] \
    .rename({'track_id': 'track_count'}, axis=1)
top = top \
    .assign(duration=top.duration_ms / 60000) \
    .drop('duration_ms', axis=1)

top10 = pd.concat([
    top[
        top.user_display_name.isin([x]) &
        top.played_at.isin([y])].nlargest(10, columns='track_count')
    for x in top.user_display_name.unique()
    for y in top.played_at.unique()])

In [None]:
top10.head()

In [None]:
chart = alt.Chart(top10) \
    .mark_point(size=25, stroke='transparent') \
    .encode(
        column=alt.Column(
            'played_at:O',
            timeUnit='month'),
        x=alt.X(
            'user_display_name:N',
            axis=alt.Axis(title='', grid=False)),
        y=alt.Y(
            'track_count:Q',
            axis=alt.Axis(grid=False)),
        color=alt.Color(
            'user_display_name:N',
            scale=alt.Scale(range=['#1DB954', '#16BAC5', '#883677']),
            legend=alt.Legend(title='User')),
        tooltip=[
            alt.Tooltip('track_name', title='track'),
            alt.Tooltip('album'),
            alt.Tooltip('artists'),
            alt.Tooltip('track_count', title='count')]) \
    .configure_view(stroke='transparent') \
    .configure_axis(domainWidth=0.8)

chart

In [None]:
# Q: How many songs from my discover do I save each week?

# A: Gather the list of all the tracks, grouped by each Discover Week
#    Then, count the intersection of the library with the track ids of the Discover Week


def find_all(ids, kind):
    found = library[
        library.track_id.isin(ids) &
        library.playlist_id.isin(['saved_tracks'])]
    return found[kind].tolist()

weekly_count = discover[discover.discover_id.isin(['notbobbobby'])] \
    .groupby('created_at')['track_id'] \
    .apply(list) \
    .reset_index()
weekly_count = weekly_count \
    .assign(tracks=weekly_count.track_id.apply(lambda ids: find_all(ids, 'track_name'))) \
    .assign(albums=weekly_count.track_id.apply(lambda ids: find_all(ids, 'album'))) \
    .assign(artists=weekly_count.track_id.apply(lambda ids: find_all(ids, 'artists')))
weekly_count = weekly_count \
    .assign(track_count=weekly_count.tracks.apply(lambda x: len(x))) \
    .assign(album_count=weekly_count.albums.apply(lambda x: len(x))) \
    .assign(artists_count=weekly_count.artists.apply(lambda x: len(x)))

In [None]:
alt.Chart(weekly_count) \
    .mark_bar() \
    .encode(
        x=alt.X('created_at:T'),
        y=alt.Y(
            'track_count:Q',
            scale=alt.Scale(domain=[0, 12])),
        color=alt.Color('track_count:Q'),
        tooltip=[
            alt.Tooltip('created_at:T', format='%A, %B %e, %Y'),
            alt.Tooltip('track_count:Q', title='saved')])

In [None]:
import os
os.getcwd()