In [141]:
#@title "Runtime > Run all" in Google Colab for full interactivity.

import numpy as np
import pandas as pd
import altair as alt
from IPython.display import clear_output, display

!wget -nc https://github.com/VisDesignStudies/assignment-two-storytelling-remixed-rvanasa/raw/master/spotify-dataset.zip
!wget -nc https://github.com/VisDesignStudies/assignment-two-storytelling-remixed-rvanasa/raw/master/internet-history.tsv
!unzip -n spotify-dataset.zip
clear_output()

def eager_supplier(fn):
  return fn()


@eager_supplier
def df_data():
  df = pd.read_csv('data.csv')
  df = df[df.year >= 1960]
  df['datetime'] = pd.to_datetime(df.year, format='%Y')
  return df

@eager_supplier
def df_data_w_genres():
  df = pd.read_csv('data_w_genres.csv')
  return df
  
@eager_supplier
def df_merged():
  def normalize_artist(a):
    return a.lower().replace('\'', '').replace('"', '').replace('[', '').replace(']', '').strip()

  dfg = df_data_w_genres.copy()
  dfg['artists'] = dfg.artists.apply(normalize_artist)

  dfd = df_data.copy()
  dfd['artists'] = dfd.artists.apply(lambda a: [s.strip() for s in normalize_artist(a).split(',')])
  dfd = dfd.explode('artists')

  df = dfd.merge(dfg[['artists', 'genres']], on='artists')
  df = df.drop_duplicates('id')
  df['genres'] = df.genres.apply(lambda g: [s.strip() for s in normalize_artist(g).split(',')])
  df = df.explode('genres')
  df = df.rename(columns={
      'genres': 'genre',
      'artists': 'artist',
  })
  df = df[df.genre.str.len() > 0]
  return df

@eager_supplier
def df_internet():
  df = pd.read_csv('internet-history.tsv', delimiter='\t')
  df['datetime'] = pd.to_datetime(df.year, format='%Y')
  return df

@eager_supplier
def genres():
  genres = pd.Series([s.strip() for a in df_data_w_genres.genres.str.casefold() for s in a.replace('\'', '').replace('"', '').replace('[', '').replace(']', '').split(',') if s.strip()]).value_counts().sort_values(ascending=False)
  # genres = genres[genres >= genres.quantile(.98)]
  return genres

@eager_supplier
def base_genres():
  gs = genres.index
  for a in gs:
    for b in gs:
      if a != b and b.endswith(a):
        gs = gs.drop(b)
  return gs

# [The Evolution of Popular Modern Music](https://graphics.latimes.com/pop-music-evolution/)

### Jon Schleuss et al.
##### Los Angeles Times
##### Royal Society Open Science
##### May 6, 2015

---

#### Remixed by Ryan Vandersmith
##### ATLS 4519-015
##### University of Colorado Boulder
##### October 12, 2020

---

#### [Spotify Dataset 1921-2020](https://www.kaggle.com/yamaerenay/spotify-dataset-19212020-160k-tracks)

#### [Internet Traffic 1990-2017](https://en.wikipedia.org/wiki/Internet_traffic)

## Introduction

Pop music is often considered a reflection of changing culture in the United States — and between 1960 and 2020, songs featured in the Billboard Hot 100 varied greatly. 

Researchers in England recently analyzed almost all the singles that charted during that period to reveal trends in the evolution of popular music—noting, among other things, that the biggest revolution to occur was the birth of rap in 1991 and that, with the exception of a brief synthesizer-happy period in the 1980s, there’s always been a lot of diversity in American pop.

To construct their evolutionary history of American pop, the scientists looked at quantifiable digital elements in the music that correspond with rhythmic, harmonic, and tonal qualities in the songs, noting where they clustered and how they changed over time.

Below, we highlight some of the easier-to-understand tonal elements they tracked, linking them to particular artists and songs. Several trends in the team’s analysis are also displayed in the graphic below, with each of the approximately 17,000 songs they analyzed represented as a dot. Search by artist and tonal quality. Mouse over chart to see results.

Five years later, an unqualified but enthusiastic college student revisited this research, offering further insights into the history of popular music over the past sixty years. 

In [142]:
#@title Prevalence of musical features over time {run:'auto'}

all_metric_cols = [
    'acousticness',
    'danceability',
    'energy',
    'explicit',
    'instrumentalness',
    'liveness',
    'speechiness',
]

acousticness = True #@param {type:'boolean'}
danceability = True #@param {type:'boolean'}
energy = True #@param {type:'boolean'}
explicitness = True #@param {type:'boolean'}
instrumentalness = True #@param {type:'boolean'}
liveness = True #@param {type:'boolean'}
speechiness = True #@param {type:'boolean'}

metric_flags = [
    acousticness,
    danceability,
    energy,
    explicitness,
    instrumentalness,
    liveness,
    speechiness,
]
metric_cols = [m for m, b in zip(all_metric_cols, metric_flags) if b]

def prepare(df):
  df = df.groupby('datetime').mean()[metric_cols].reset_index()
  df = df.set_index('datetime')
  for col in metric_cols:
    df[col] = df[col] / sum(df[col])
  df = df.rename(columns={'explicit': 'explicitness'})
  df.columns = df.columns.str.capitalize()
  df = df.reset_index()
  df = df.melt('datetime')
  df['Year'] = df.datetime.dt.year
  return df


selection = alt.selection(type='multi', on='mouseover', fields=['variable'], bind='legend')

alt.Chart(prepare(df_data)).mark_area().encode(
    alt.X('datetime', title='Year'),
    alt.Y('value', title='Score', stack='normalize'),
    alt.Color('variable', title='Metric', scale=alt.Scale(scheme='category20')),
    alt.Tooltip('Year'),
    # opacity=alt.condition(selection, alt.value(.9), alt.value(1)),
).add_selection(selection)

In [143]:
# @title Popular music genres {run:'auto'}

genre_packages = {
    'Most Popular': base_genres[:20],
    'Article': [
        'rap',
        'hip hop',
        'dance',
        'disco',
        'new wave pop',
        'new wave',
        'rock',
        'pop rock',
    ],
}

compare_genres = 'Most Popular' #@param ['Most Popular', 'Article']
compare_metric = 'Acousticness' #@param ['Acousticness', 'Danceability', 'Energy', 'Explicitness', 'Instrumentalness', 'Liveness', 'Speechiness']

compare_genres = genre_packages[compare_genres]

df = df_merged[df_merged.genre.isin(compare_genres)]
df = df.rename(columns={'explicit': 'explicitness'})
df = df.groupby(['genre', 'datetime']).agg(['mean', 'sum'])
df.columns = ['_'.join(c) for c in df.columns.ravel()]
df = df.reset_index()
df.genre = df.genre.str.title()

# Arbitrary threshold
df = df[df.popularity_sum >= 5000]

selection = alt.selection(type='multi', on='mouseover', fields=['genre'], bind='legend')

alt.Chart(df).mark_square().encode(
    alt.X('datetime', title='Year'),
    alt.Y(f'{compare_metric.lower()}_mean', title=compare_metric),
    alt.Color('genre', title='Genre', sort=[s.title() for s in compare_genres]),
    alt.Tooltip('genre'),
    alt.Size('popularity_sum', legend=None),
    opacity=alt.condition(selection, alt.value(.7), alt.value(.2)),
).add_selection(selection).interactive()

## Mellow '60s and '70s fade, reborn in the '90s

Common in the music of singers like Smokey Robinson, Tony Bennett and Patsy Cline, this musical quality has a resurgence in the 1990s with artists like Madonna and Boyz II Men.

**Elements: calm, quiet, mellow**

## Big guitar rocks, rolls, rebounds

Rock and roll rises and falls in two full cycles with peaks in 1966 (the Turtles, Paul Revere & the Raiders) and 1985 (the heyday of stadium rock groups such as Motley Crue, Van Halen and Cheap Trick). It heads upward once more in the 2000s.

**Elements: guitar, loud, energetic**

## Manilow couldn't keep the orchestra alive

Think Electric Light Orchestra, the Allman Brothers Band, Supertramp and other '70s powerhouses. Declines in the '80s but sees a resurgence in the 2000s Counting Crows and Coldplay.

**Elements: piano, orchestra, harmonic**

## The rise and fall of the drum machine

Rises continuously until 1990 as the use of drum machines spreads. Dance, disco and New Wave artists such as the Pet Shop Boys share these tonal qualities. Their frequency declines after 1990 as the reign of the drum machine ends.

**Elements: drums, aggressive, percussive**

## Hip-hop revolution ignites

This time period ramps up with the rise of rap and hip-hop in the late '80s and into the '90s, with artists like Busta Rhymes, Ludacris and Snoop Dogg.

**Elements: energetic, speech, bright**

---

In [144]:
#@title Genre popularity and features over time

#@markdown **Blue:** Danceability

#@markdown **Green:** Energy

#@markdown **Red:** Explicitness

from IPython.display import display, HTML
import matplotlib.pyplot as plt
from matplotlib import animation
from PIL import Image
import io
import base64

class PlotLayout:
  def __init__(self):
    self.images = []
    self._figsize = None


  def add(self, ax):
    img = io.BytesIO()
    fig = ax.get_figure()
    fig.canvas.print_png(img)
    self.images.append(img)
    plt.close()

    self._figsize = fig.bbox.width / fig.dpi, fig.bbox.height / fig.dpi


  def show(self):
    display(HTML('''
    <style>
      .layout-plot {
      display: inline-block;
      margin: 10px;
      border: 1px solid #DDD;
      }
    </style>
    ''' + ''.join((
        '<div class="layout-plot">'
        f'<img src="data:image/png;base64,{base64.b64encode(bio.getvalue()).decode()}\n">'
        '</div>'
    ) for bio in self.images)))
  

  def animate(self, **kw):
    fig, ax = plt.subplots(figsize=self._figsize)
    fig.tight_layout()
    ax.axis('off')
    frames = []
    for bio in self.images:
        img = Image.open(bio)
        frame = ax.imshow(img, animated=True, aspect='auto')
        frames.append([frame])

    anim = animation.ArtistAnimation(fig, frames, **{**dict(interval=50, blit=True, repeat_delay=100), **kw})
    display(HTML(anim.to_html5_video()))
    plt.close()


import matplotlib.cm

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

df_start = df_merged[df_merged.genre.isin(base_genres[:10])]

print('Generating visualization...')

layout = PlotLayout()
for year in sorted(df_start.year.unique()):#[:10]:
  df = df_start[df_start.year == year]
  df = df.groupby('genre').agg(['sum', 'mean'])
  df.columns = ['_'.join(c) for c in df.columns.ravel()]

  # df['colors'] = colors[:len(df)]

  df = df.sort_values('popularity_sum', ascending=False)

  df.index = df.index.str.title()

  fig, ax = plt.subplots(dpi=200)
  ax.set(title=year, xlim=[-1, 1], ylim=[-1, 1])
  ax.set_aspect('equal', adjustable='box')
  ax.get_figure().tight_layout()
  ax.axis('off')

  colors = []
  for genre, row in df.iterrows():
    colors.append([row.explicit_mean, row.energy_mean, row.danceability_mean])

  ax.pie(df.popularity_sum, explode=[.05] * len(df), labels=df.index, autopct='%1.1f%%', shadow=True, startangle=0, colors=colors)

  layout.add(ax)


clear_output()
layout.animate(interval=1000 // 4)

## From funk to f**k

Over the past 60 years, American pop music made a dramatic transition from mellow, dance-driven tunes in the '60s and '70s to intense, high energy music in the '80s. From there, rap and hip-hop music became center-stage with progressively more explicit lyrics and themes. 

I hypothesized that this sudden increase in explicit music is an indirect product of the rise of Internet streaming services rather than a change in American culture. Controversial artists and albums are now easily accessible on platforms such as iTunes and Spotify, elevating the popularity of these styles of music. 



In [183]:
#@title Music explicitness vs. global internet traffic

df = df_internet.copy()

df['traffic'] = df.ip_traffic
# df['label'] = 'Internet Traffic'
# df.traffic = np.log(df.traffic)
# df.traffic -= np.min(df.traffic)
df.traffic /= max(df.traffic)

internet_chart = alt.Chart(df).mark_line().encode(
    alt.X('datetime'),
    alt.Y('traffic'),
    alt.Tooltip('ip_traffic'),
    # alt.Color('label'),
    color=alt.value('#444'),
)

##############################

compare_genres = ['hip hop', 'rap']

df = df_merged.copy()
df.loc[~df.genre.isin(compare_genres), 'genre'] = 'Other'

df = pd.concat([df[df.genre != 'Other'], df.drop_duplicates('id')]).drop_duplicates()

df = df.rename(columns={'explicit': 'explicitness'})
df = df.groupby(['genre', 'datetime']).agg(['mean', 'sum'])
df.columns = ['_'.join(c) for c in df.columns.ravel()]
df = df.reset_index()
df.genre = df.genre.str.title()

selection = alt.selection(type='multi', on='mouseover', fields=['genre'], bind='legend')

explicitness_chart = alt.Chart(df).mark_square(size=100).encode(
    alt.X('datetime', title='Year'),
    alt.Y(f'explicitness_mean', title='Explicitness'),
    alt.Color('genre', title='Genre', sort=[s.title() for s in compare_genres]),
    alt.Tooltip('genre'),
    alt.Size('popularity_sum', legend=None),
    opacity=alt.condition(selection, alt.value(.7), alt.value(.2)),
).add_selection(selection).interactive()

explicitness_chart + internet_chart

Contrary to my prediction, rap and hip-hop began to feature explicit music began decades before the widespread adoption of the Internet, peaking in the mid-1990s. However, the general increase in explicitness after 2010 suggests that the Internet is more recently involved in reinforcing and popularizing this cultural phenomenon. 