<a href="https://colab.research.google.com/github/g-r-a-e-m-e/spotify-exploration/blob/main/visualizations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Boilerplate
# Import necessary packages
!pip install arrow
import arrow
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
#import streamlit as st
from google.colab import drive
drive.mount('/content/drive')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting arrow
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[K     |████████████████████████████████| 66 kB 3.2 MB/s 
Installing collected packages: arrow
Successfully installed arrow-1.2.3
Mounted at /content/drive


In [2]:
# Read in data
data_path = '/content/drive/MyDrive/notebooks/spotify-exploration/data/output.csv'
df = pd.read_csv(data_path)

In [10]:
df['pct_listened'] = df['pct_listened'].apply(lambda x: 1 if (x > 1) else x)
df['stream_week'] = df['stream_date'].apply(lambda x: arrow.get(x).span('week', week_start = 7)[1].format('YYYY-MM-DD'))

In [4]:
df.columns

Index(['Unnamed: 0', 'endTime', 'artistName', 'trackName', 'msPlayed',
       'track_id', 'acousticness', 'danceability', 'duration_ms', 'energy',
       'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
       'speechiness', 'tempo', 'time_signature', 'valence', 'time_played',
       'duration', 'pct_listened', 'stream_date_time', 'stream_date',
       'stream_time', 'time_of_day', 'month_number', 'month_name',
       'day_of_week', 'season'],
      dtype='object')

In [5]:
streaming_hist_pct = px.histogram(df, 
                              x = 'day_of_week',
                              barnorm = 'percent',
                              category_orders = {'day_of_week': ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']},
                              color = 'time_of_day',
                              labels = {'day_of_week': 'Day of Week',
                                        'time_of_day': 'Time of Day',},
                              title = 'Percent of Streams by Day of Week and Time of Day',
                              color_discrete_sequence = px.colors.qualitative.G10)
streaming_hist_pct

In [6]:
streaming_hist = px.histogram(df, 
                              x = 'day_of_week',
                              category_orders = {'day_of_week': ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']},
                              color = 'time_of_day',
                              labels = {'day_of_week': 'Day of Week',
                                        'time_of_day': 'Time of Day',},
                              title = 'Number of Streams by Day of Week and Time of Day',
                              color_discrete_sequence = px.colors.qualitative.G10)
streaming_hist

In [7]:
audio_features = ['acousticness', 'danceability', 'energy', 
                  'instrumentalness', 'liveness','speechiness','valence']

time_of_day = ['Morning', 'Afternoon', 'Evening', 'Night']

In [16]:
fig = make_subplots(rows = 2, cols = 2,
                    vertical_spacing = 0.25,
                    horizontal_spacing = 0.1,
                    subplot_titles = time_of_day)

for tod in time_of_day:
  corr = df[df['time_of_day'] == tod][audio_features].corr()
  if tod == 'Morning':
    fig.add_trace(go.Heatmap(x = corr.columns,
                             y = corr.columns,
                             z = corr.values,
                             zmin = -1,
                             zmax = 1,
                             colorscale = px.colors.sequential.Inferno,
                             text = corr.values,
                             texttemplate = '%{z:.2f}',
                             name = tod),
                  row = 1,
                  col = 1)
  if tod == 'Afternoon':
    fig.add_trace(go.Heatmap(x = corr.columns,
                             y = corr.columns,
                             z = corr.values,
                             zmin = -1,
                             zmax = 1,
                             colorscale = px.colors.sequential.Inferno,
                             text = corr.values,
                             texttemplate = '%{z:.2f}',
                             name = tod),
                  row = 1,
                  col = 2)
  if tod == 'Evening':
    fig.add_trace(go.Heatmap(x = corr.columns,
                             y = corr.columns,
                             z = corr.values,
                             zmin = -1,
                             zmax = 1,
                             colorscale = px.colors.sequential.Inferno,
                             text = corr.values,
                             texttemplate = '%{z:.2f}',
                             name = tod),
                  row = 2,
                  col = 1)
  else:
    fig.add_trace(go.Heatmap(x = corr.columns,
                             y = corr.columns,
                             z = corr.values,
                             zmin = -1,
                             zmax = 1,
                             colorscale = px.colors.sequential.Inferno,
                             text = corr.values,
                             texttemplate = '%{z:.2f}',
                             name = tod),
                  row = 2,
                  col = 2)


fig.update_layout(title = 'Audio Feature Correlation by Time of Day',)

fig.show()

In [11]:
avg_df = df.pivot_table(values = audio_features, index = ['stream_date', 'time_of_day'], aggfunc  = 'mean').reset_index()

avg_df_weekly = df.pivot_table(values = audio_features, index = ['stream_week', 'time_of_day'], aggfunc = 'mean').reset_index()

#line_chart_daily = px.line(avg_df, x = 'stream_date', y = audio_features, facet_col = 'time_of_day', facet_col_wrap = 2)

#line_chart_weekly = px.line(avg_df_weekly, x = 'stream_week', y = audio_features, facet_col = 'time_of_day', facet_col_wrap = 2)

In [12]:
#line_chart_daily

#line_chart_weekly

In [19]:
# Create figure
fig = go.Figure()

traces = []

color_idx = 0

for af in audio_features:
  y_axis_count = 1
  for tod in ['Morning', 'Afternoon', 'Evening', 'Night']:
    if y_axis_count == 1:
      y_axis = 'y'
      traces.append(go.Scatter(x = avg_df_weekly[avg_df_weekly['time_of_day'] == tod]['stream_week'],
                               y = avg_df_weekly[avg_df_weekly['time_of_day'] == tod][af],
                               name = af,
                               yaxis = y_axis,
                               legendgroup = af,
                               line = dict(color = px.colors.qualitative.T10[color_idx]),
                               connectgaps = False))
      y_axis_count += 1
    else:
      y_axis = 'y'+str(y_axis_count)
      traces.append(go.Scatter(x = avg_df_weekly[avg_df_weekly['time_of_day'] == tod]['stream_week'],
                               y = avg_df_weekly[avg_df_weekly['time_of_day'] == tod][af],
                               name = af,
                               yaxis = y_axis,
                               legendgroup = af,
                               line = dict(color = px.colors.qualitative.T10[color_idx]),
                               connectgaps = False,
                               showlegend = False))
      y_axis_count += 1
  color_idx += 1

for t in traces:
  fig.add_trace(t)

# Set title
#fig.update_layout(
#    title_text="Time series with range slider and selectors"
#)

# Add range slider
fig.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(count = 1,
                     label = "1m",
                     step = "month",
                     stepmode = "backward"),
                dict(count = 3,
                     label = "3m",
                     step = "month",
                     stepmode = "backward"),
                dict(count = 6,
                     label = "6m",
                     step = "month",
                     stepmode = "backward"),
                dict(count = 1,
                     label = "YTD",
                     step = "year",
                     stepmode = "todate"),
                dict(count = 1,
                     label = "1y",
                     step = "year",
                     stepmode = "backward"),
                dict(step = "all")
            ])
        ),
        rangeslider = dict(visible = False),
        type = "date"
    ),
    yaxis = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0, 0.25],
        title = 'Morning'),
    yaxis2 = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0.25, 0.5],
        title = 'Afternoon'),
    yaxis3 = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0.5, 0.75],
        title = 'Evening'),
    yaxis4 = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0.75, 1],
        title = 'Night'),
    legend = dict(groupclick = "togglegroup"),
    legend_title = 'Audio Feature',
    title = "Average Weekly Value of Audio Features by Time of Day" \
            "<br>" \
              "<sup>" \
                "Represents the popularity of a given audio feature over time, " \
                "partitioned by time of day." \
              "</sup>",
    height = 600
)

fig.show()

fig.write_html('/content/drive/MyDrive/notebooks/spotify-exploration/charts/avg-weekly-value-audio-features-time-day.html')

In [15]:
l# Create figure
fig = go.Figure()

traces = []

color_idx = 0

for af in audio_features:
  y_axis_count = 1
  for tod in ['Morning', 'Afternoon', 'Evening', 'Night']:
    if y_axis_count == 1:
      y_axis = 'y'
      traces.append(go.Bar(x = avg_df_weekly[avg_df_weekly['time_of_day'] == tod]['stream_week'],
                           y = avg_df_weekly[avg_df_weekly['time_of_day'] == tod][af],
                           name = af,
                           yaxis = y_axis,
                           legendgroup = af,
                           marker = dict(color = px.colors.qualitative.T10[color_idx])))
      y_axis_count += 1
    else:
      y_axis = 'y'+str(y_axis_count)
      traces.append(go.Bar(x = avg_df_weekly[avg_df_weekly['time_of_day'] == tod]['stream_week'],
                           y = avg_df_weekly[avg_df_weekly['time_of_day'] == tod][af],
                           name = af,
                           yaxis = y_axis,
                           legendgroup = af,
                           marker = dict(color = px.colors.qualitative.T10[color_idx]),
                           showlegend = False))
      y_axis_count += 1
  color_idx += 1

for t in traces:
  fig.add_trace(t)

# Set title
#fig.update_layout(
#    title_text="Time series with range slider and selectors"
#)

# Add range slider
fig.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(count = 1,
                     label = "1m",
                     step = "month",
                     stepmode = "backward"),
                dict(count = 3,
                     label = "3m",
                     step = "month",
                     stepmode = "backward"),
                dict(count = 6,
                     label = "6m",
                     step = "month",
                     stepmode = "backward"),
                dict(count = 1,
                     label = "YTD",
                     step = "year",
                     stepmode = "todate"),
                dict(count = 1,
                     label = "1y",
                     step = "year",
                     stepmode = "backward"),
                dict(count = 1,
                     label = 'All',
                     step = "all"),
            ]),
        ),
        rangeslider = dict(visible = False),
        type = "date"
    ),
    yaxis = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0, 0.2],
        title = 'Morning'),
    yaxis2 = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0.25, 0.45],
        title = 'Afternoon'),
    yaxis3 = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0.5, 0.7],
        title = 'Evening'),
    yaxis4 = dict(
        anchor = "x",
        range = [0, 1],
        domain = [0.75, 0.95],
        title = 'Night'),
    barmode = 'group',
    legend = dict(groupclick = "togglegroup"),
    legend_title = 'Audio Feature',
    title = "Average Weekly Value of Audio Features by Time of Day" \
            "<br>" \
              "<sup>" \
                "Represents the popularity of a given audio feature over time, " \
                "partitioned by time of day." \
              "</sup>",
    height = 600
)

fig.show()