In [1]:
import pandas as pd
from bokeh.io import output_notebook
output_notebook()

In [2]:
songs_df = pd.read_json('data/songs.json')
albums_df = pd.read_json('data/albums.json')
shows_df = pd.read_json('data/shows.json')

## Make "full_df"

Contains album and songs info merged together

In [3]:
# make `wide_df` where each song gets a column
frames = [
    albums_df,
    pd.DataFrame(albums_df['track_list'].tolist())]
wide_df = pd.concat(frames, axis=1)

# "melt" wide_df into tidy format
song_cols = [x for x in wide_df.columns if isinstance(x, int)]
tidy_albums = wide_df.melt(
    id_vars=['title', 'year'], var_name='track_num',
    value_vars=song_cols, value_name='song')

# Merge with songs_df
_df = songs_df[['title', 'author']].rename(columns={'title':'song'})
full_df = pd.merge(
    left=tidy_albums, right=_df, on='song')

# sort by title then track number
full_df = (full_df.sort_values(['title', 'track_num'])
               .dropna()
               .reset_index(drop=True))

## Plotting

### Data Prep

In [4]:
def get_counts(df):
    _counts = df['year'].value_counts()
    return (_counts.to_frame(name='count')
              .reset_index()
              .rename(columns={'index':'year'}))

In [18]:
def resize_list(lst, target_len):
    lst = lst[:target_len]
    if len(lst) == target_len:
        return lst
    else:
        return resize_list(lst*2, target_len)

In [29]:
def get_data_author(full_df):
    bob_counts = get_counts(full_df.query("author == 'Bob Dylan'"))
    other_counts = get_counts(full_df.query("author != 'Bob Dylan'"))
    return pd.merge(
        bob_counts, other_counts, on='year',
        suffixes=('_bob', '_other'))

In [30]:
def get_data_album(full_df, albums_df):
    _counts = full_df['title'].value_counts()
    alb_counts = (_counts.to_frame(name='count')
                  .reset_index()
                  .rename(columns={'index':'title'}))
    
    df = pd.merge(alb_counts, albums_df)
    
    return (
        df.pivot(index='year', columns='title', values='count')
        .reset_index()
        .fillna(0))

### Plotting

In [34]:
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.palettes import Category20_20
from bokeh.models import HoverTool

In [35]:
categories = list(data.columns)[1:]
colors = resize_list(Category20_20, len(categories))

source = ColumnDataSource(data=data)

p = figure(
    plot_height=450, title="Number of songs recorded by year",
    toolbar_location=None, tools="")

p.vbar_stack(
    categories, x='year', width=.9, source=source,
    color=colors, name=categories)

p.add_tools(HoverTool(tooltips=[
    ('Year', '@year')]))

p.y_range.start = 0
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.legend.location = "top_left"

show(p)

In [37]:
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.palettes import Spectral6

In [38]:
source = ColumnDataSource(data=data)
categories = ['count_bob', 'count_other']
colors = Spectral6[:2]

p = figure(
    plot_height=350, title="Number of songs recorded by year",
    toolbar_location=None, tools="")

p.vbar_stack(
    categories, x='year', width=.8, source=source,
    color=colors, legend=[value(x) for x in categories])

p.y_range.start = 0
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.legend.location = "top_left"

show(p)