# Import Packages

In [1]:
import pandas as pd
import plotly.express as px

def fix_df(df, name):
    '''Clean up the dataframe after it is imported'''
    # add a column for the playlist name if you concat with another
    df['playlist'] = name
    # Fix the release date so Python "knows" it's a date
    df['track.album.release_date'] = pd.to_datetime(df['track.album.release_date'], errors='coerce')

# Compare Artists

## Artist 1: Raffi

In [2]:
playlist_name = 'This Is Raffi'
# read the csv file
df1 = pd.read_csv(f"data/{playlist_name}.csv")
fix_df(df1, playlist_name)
# Show the info
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 59 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   Unnamed: 0                          46 non-null     int64         
 1   added_at                            46 non-null     object        
 2   is_local                            46 non-null     bool          
 3   primary_color                       0 non-null      float64       
 4   added_by.external_urls.spotify      46 non-null     object        
 5   added_by.href                       46 non-null     object        
 6   added_by.id                         0 non-null      float64       
 7   added_by.type                       46 non-null     object        
 8   added_by.uri                        46 non-null     object        
 9   track.album.album_type              46 non-null     object        
 10  track.album.artists         

In [3]:
fig = px.box(df1, # what dataframe are we using?
             # What df columns do we want to plot?
             y=['danceability', 'energy', 'instrumentalness', 'acousticness', 'liveness', 'speechiness', 'valence'],
             # Show the individual data points
             points='all',
             # Show the name of the song when hovering
             hover_name='track.name')
fig.show()

In [4]:
other_playlist_name = 'This Is Jools TV'
df2 = pd.read_csv(f"data/{other_playlist_name}.csv")
fix_df(df2, other_playlist_name)

# stick this df at the bottom of the other df
data = pd.concat([df1, df2])

In [5]:
fig = px.scatter(data, # Which df for our scatterplot?
                 x='track.duration_ms', # x-axis
                 y='tempo', # y-axis
                 hover_name='track.name', # show when hovering
                 color='playlist', # color code the playlists
                 marginal_x='box' # add a box plot for x-axis values
                )
fig.show()

In [6]:
fig = px.box(data,
             y=['danceability', 'energy', 'instrumentalness', 'acousticness', 'liveness', 'speechiness', 'valence'],
             points='all',
             hover_name='track.name',
             color='playlist')
fig.show()

In [7]:
fig = px.scatter(data,
                 x='energy',
                 y='acousticness',
                 size='track.duration_ms', # bubble size proportional to track duration
                 hover_name='track.name',
                 color='playlist')
fig.show()

In [8]:
fig = px.histogram(data, x='track.popularity', color='playlist', marginal='rug')
fig.show()