In [1]:
import pandas as pd
import numpy as np
from IPython.display import Markdown as md
np.__version__

'1.19.5'

# Load in the Hot 100 data
Basically, Sean Miller (who made this data.world dataset I used for the prototype) keeps [a super up-do-date list of Hot 100 on GitHub](https://github.com/HipsterVizNinja/random-data/tree/main/Music/hot-100) through much of 2022, last I checked.

In [2]:
USEFUL_COLUMNS = ['chart_position', 'chart_debut', 'song', 'performer', 'song_id']
hot_100_raw = pd.read_csv('./data/Hot 100 through Oct 2022.csv')[USEFUL_COLUMNS]

In [3]:
hot_100_raw.head()

Unnamed: 0,chart_position,chart_debut,song,performer,song_id
0,84,1990-05-05,"""B"" Girls",Young And Restless,"""B"" GirlsYoung And Restless"
1,78,1990-05-05,"""B"" Girls",Young And Restless,"""B"" GirlsYoung And Restless"
2,68,1990-05-05,"""B"" Girls",Young And Restless,"""B"" GirlsYoung And Restless"
3,60,1990-05-05,"""B"" Girls",Young And Restless,"""B"" GirlsYoung And Restless"
4,58,1990-05-05,"""B"" Girls",Young And Restless,"""B"" GirlsYoung And Restless"


In [4]:
md(f"## Unique songs available in full dataset: {len(hot_100_raw.drop_duplicates(subset=['song_id']))}")

## Unique songs available in full dataset: 30314

In [5]:
SONG_RANKING_FILTER_THRESHOLD = 10

In [6]:
md(f"# Filter to unique songs that breached the top {SONG_RANKING_FILTER_THRESHOLD}")

# Filter to unique songs that breached the top 10

In [7]:
temp = hot_100_raw.sort_values('chart_position').groupby(['song_id']).first()
only_songs_that_made_it_above_x = temp[
    temp['chart_position'] <= SONG_RANKING_FILTER_THRESHOLD
].reset_index()

only_songs_that_made_it_above_x

Unnamed: 0,song_id,chart_position,chart_debut,song,performer
0,#9 DreamJohn Lennon,9,1974-12-21,#9 Dream,John Lennon
1,'03 Bonnie & ClydeJay-Z Featuring Beyonce Knowles,4,2002-10-26,'03 Bonnie & Clyde,Jay-Z Featuring Beyonce Knowles
2,'65 Love AffairPaul Davis,6,1982-02-27,'65 Love Affair,Paul Davis
3,('til) I Kissed YouThe Everly Brothers,4,1959-08-15,('til) I Kissed You,The Everly Brothers
4,(Can't Live Without Your) Love And AffectionNe...,1,1990-07-07,(Can't Live Without Your) Love And Affection,Nelson
...,...,...,...,...,...
5061,everything i wantedBillie Eilish,8,2019-11-23,everything i wanted,Billie Eilish
5062,iSpyKYLE Featuring Lil Yachty,4,2017-01-14,iSpy,KYLE Featuring Lil Yachty
5063,interludeJ. Cole,8,2021-05-22,interlude,J. Cole
5064,"my.lifeJ. Cole, 21 Savage & Morray",2,2021-05-29,my.life,"J. Cole, 21 Savage & Morray"


# Pull Spotify
Pull my own metadata from [Spotify](https://developer.spotify.com/documentation/web-api/), using [Spotipy](https://spotipy.readthedocs.io/en/2.19.0/).

### NOTE: unfortunately Spotify doesn't (no longer?) give you genre for a given song. The best you can get is genre for the *artist*. Darn...

In [8]:
import spotipy
import sys
from spotipy.oauth2 import SpotifyClientCredentials

spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

## TEST / DEMOS

### Search for Radiohead

In [9]:
name = 'Radiohead'

results = spotify.search(q='artist:' + name, type='artist')
items = results['artists']['items']
items[0]

{'external_urls': {'spotify': 'https://open.spotify.com/artist/4Z8W4fKeB5YxbusRsdQVPb'},
 'followers': {'href': None, 'total': 7479537},
 'genres': ['alternative rock',
  'art rock',
  'melancholia',
  'oxford indie',
  'permanent wave',
  'rock'],
 'href': 'https://api.spotify.com/v1/artists/4Z8W4fKeB5YxbusRsdQVPb',
 'id': '4Z8W4fKeB5YxbusRsdQVPb',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/ab6761610000e5eba03696716c9ee605006047fd',
   'width': 640},
  {'height': 320,
   'url': 'https://i.scdn.co/image/ab67616100005174a03696716c9ee605006047fd',
   'width': 320},
  {'height': 160,
   'url': 'https://i.scdn.co/image/ab6761610000f178a03696716c9ee605006047fd',
   'width': 160}],
 'name': 'Radiohead',
 'popularity': 79,
 'type': 'artist',
 'uri': 'spotify:artist:4Z8W4fKeB5YxbusRsdQVPb'}

### Get John Lennon genres

In [10]:
spotify.search(q=f'artist:{"John Lennon"}', type='artist')['artists']['items'][0]['genres']

['album rock', 'art rock', 'beatlesque', 'classic rock', 'mellow gold', 'rock']

### Search for Rolling in the Deep, then use the URI to fetch details

In [11]:
track = 'Rolling in the Deep'
results = spotify.search(q=f'track:{track}', type='track')
fetched_track_uri = results['tracks']['items'][0]['uri']

In [12]:
fetched_album_uri = results['tracks']['items'][0]['album']['uri']
spotify.album(fetched_album_uri)

{'album_type': 'album',
 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4dpARuHxo51G3z768sgnrY'},
   'href': 'https://api.spotify.com/v1/artists/4dpARuHxo51G3z768sgnrY',
   'id': '4dpARuHxo51G3z768sgnrY',
   'name': 'Adele',
   'type': 'artist',
   'uri': 'spotify:artist:4dpARuHxo51G3z768sgnrY'}],
 'available_markets': ['AR',
  'BO',
  'BR',
  'BZ',
  'CL',
  'CO',
  'CR',
  'DO',
  'EC',
  'GT',
  'GY',
  'HN',
  'MX',
  'NI',
  'PA',
  'PE',
  'PY',
  'SR',
  'SV',
  'US',
  'UY',
  'VE'],
 'copyrights': [{'text': '2011 XL Recordings Ltd', 'type': 'C'},
  {'text': '2011 XL Recordings Ltd', 'type': 'P'}],
 'external_ids': {'upc': '191404113851'},
 'external_urls': {'spotify': 'https://open.spotify.com/album/0Lg1uZvI312TPqxNWShFXL'},
 'genres': [],
 'href': 'https://api.spotify.com/v1/albums/0Lg1uZvI312TPqxNWShFXL',
 'id': '0Lg1uZvI312TPqxNWShFXL',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/ab67616d0000b2732118bf9b198b05a95ded6300',
   'w

In [13]:
fetched_artist_uri = results['tracks']['items'][0]['artists'][0]['uri']
spotify.artist(fetched_artist_uri)['genres']

['british soul', 'pop', 'pop soul', 'uk pop']

In [14]:
analysis = spotify.audio_analysis(fetched_track_uri)
features = spotify.audio_features(fetched_track_uri)

In [15]:
features

[{'danceability': 0.73,
  'energy': 0.769,
  'key': 8,
  'loudness': -5.114,
  'mode': 1,
  'speechiness': 0.0298,
  'acousticness': 0.138,
  'instrumentalness': 0,
  'liveness': 0.0473,
  'valence': 0.507,
  'tempo': 104.948,
  'type': 'audio_features',
  'id': '1c8gk2PeTE04A1pIDH9YMk',
  'uri': 'spotify:track:1c8gk2PeTE04A1pIDH9YMk',
  'track_href': 'https://api.spotify.com/v1/tracks/1c8gk2PeTE04A1pIDH9YMk',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1c8gk2PeTE04A1pIDH9YMk',
  'duration_ms': 228093,
  'time_signature': 4}]

# Merge in spotify (if any) data

In [16]:
ready_for_export = only_songs_that_made_it_above_x

In [18]:
ready_for_export.to_csv('./data/1 DONE RIGHT OUTPUT unique songs.csv', index=False)