# INFO 3402 - Module Assignment 3
[Jason Kibozi-Yocka](https://github.com/jkibozi)<br>
University of Colorado Boulder<br>

In [1]:
# Libraries for working with APIs
import requests as req
import json

# Libraries for parsing HTML
from bs4 import BeautifulSoup as bs

# Libraries for dataframes
import numpy as np
import pandas as pd
pd.options.display.max_columns = 200

# Libraries for sentiment analysis
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as vsia

# Libraries for data visualization
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sb

In order to use/import vader: `pip install vaderSentiment`

## API #1: [Billboard Top 100](https://github.com/guoguo12/billboard-charts)
`pip install billboard.py`<br>
[billboard.py documentation](https://github.com/guoguo12/billboard-charts)

In [2]:
# libraries for working with billboard api
import billboard as bb

In [3]:
# retrieve billboard hot 100 data
hot100 = bb.ChartData('hot-100')

In [4]:
# let's see what type of object this is
type(hot100)

billboard.ChartData

In [5]:
# pandas can't convert this datatype to a dataframe so...
# let's turn it into a list of lists
billList = []
for i in range(100):
    song = hot100[i]
    entry = [song.title,song.artist,song.weeks]
    billList.append(entry)

In [6]:
# let's check out our list
billList

[['HIGHEST IN THE ROOM', 'Travis Scott', 1],
 ['Truth Hurts', 'Lizzo', 23],
 ['Senorita', 'Shawn Mendes & Camila Cabello', 16],
 ['10,000 Hours', 'Dan + Shay & Justin Bieber', 1],
 ['Someone You Loved', 'Lewis Capaldi', 22],
 ['Circles', 'Post Malone', 6],
 ['No Guidance', 'Chris Brown Featuring Drake', 18],
 ['Ran$om', 'Lil Tecca', 19],
 ['Bad Guy', 'Billie Eilish', 28],
 ['Panini', 'Lil Nas X', 16],
 ['Bandit', 'Juice WRLD & YoungBoy Never Broke Again', 1],
 ['Goodbyes', 'Post Malone Featuring Young Thug', 14],
 ["I Don't Care", 'Ed Sheeran & Justin Bieber', 22],
 ['Old Town Road', 'Lil Nas X Featuring Billy Ray Cyrus', 32],
 ['Beautiful People', 'Ed Sheeran Featuring Khalid', 15],
 ['Playing Games', 'Summer Walker', 6],
 ['Talk', 'Khalid', 35],
 ['Sunflower (Spider-Man: Into The Spider-Verse)',
  'Post Malone & Swae Lee',
  51],
 ['Sucker', 'Jonas Brothers', 32],
 ['Memories', 'Maroon 5', 3],
 ['Money In The Grave', 'Drake Featuring Rick Ross', 17],
 ['Trampoline', 'SHAED', 19],
 ['

In [7]:
# let's break down the artists even further by seperating them with ';'
for i in range(100):
    song = billList[i]
    artists = song[1]
    if 'Featuring' in artists:
        artists = artists.replace(' Featuring ',';')
    if '&' in artists:
        artists = artists.replace(' & ',';')
    if ',' in artists:
        artists = artists.replace(', ',';')
    if '+' in artists:
        artists = artists.replace(' + ',';')
    billList[i][1] = artists

In [8]:
# let's check out our list
billList

[['HIGHEST IN THE ROOM', 'Travis Scott', 1],
 ['Truth Hurts', 'Lizzo', 23],
 ['Senorita', 'Shawn Mendes;Camila Cabello', 16],
 ['10,000 Hours', 'Dan;Shay;Justin Bieber', 1],
 ['Someone You Loved', 'Lewis Capaldi', 22],
 ['Circles', 'Post Malone', 6],
 ['No Guidance', 'Chris Brown;Drake', 18],
 ['Ran$om', 'Lil Tecca', 19],
 ['Bad Guy', 'Billie Eilish', 28],
 ['Panini', 'Lil Nas X', 16],
 ['Bandit', 'Juice WRLD;YoungBoy Never Broke Again', 1],
 ['Goodbyes', 'Post Malone;Young Thug', 14],
 ["I Don't Care", 'Ed Sheeran;Justin Bieber', 22],
 ['Old Town Road', 'Lil Nas X;Billy Ray Cyrus', 32],
 ['Beautiful People', 'Ed Sheeran;Khalid', 15],
 ['Playing Games', 'Summer Walker', 6],
 ['Talk', 'Khalid', 35],
 ['Sunflower (Spider-Man: Into The Spider-Verse)', 'Post Malone;Swae Lee', 51],
 ['Sucker', 'Jonas Brothers', 32],
 ['Memories', 'Maroon 5', 3],
 ['Money In The Grave', 'Drake;Rick Ross', 17],
 ['Trampoline', 'SHAED', 19],
 ['Only Human', 'Jonas Brothers', 17],
 ['Good As Hell', 'Lizzo', 7],

In [9]:
# let's turn out list into a dataframe
billDF = pd.DataFrame(data = billList)
billDF.columns = ['Song Title','Artists','Weeks on Billboard']

billDF.head()

Unnamed: 0,Song Title,Artists,Weeks on Billboard
0,HIGHEST IN THE ROOM,Travis Scott,1
1,Truth Hurts,Lizzo,23
2,Senorita,Shawn Mendes;Camila Cabello,16
3,"10,000 Hours",Dan;Shay;Justin Bieber,1
4,Someone You Loved,Lewis Capaldi,22


## API #2: [Spotify for Developers](https://developer.spotify.com/documentation/web-api/)
`pip install spotipy`<br>
[spotipy documentation](https://spotipy.readthedocs.io/en/latest/)

In [10]:
# Libraries for working with spotify api
import spotipy as sy
from spotipy.oauth2 import SpotifyClientCredentials as sycred

In [11]:
# retrieve API credentials
with open('api_keys.json','r') as file:
    creds = json.load(file)

# create a variable that handles requests
handler = sycred(client_id = creds['spotify']['client_id'],
                 client_secret = creds['spotify']['client_secret'])

sp = sy.Spotify(client_credentials_manager = handler)

In [12]:
# add new columns to our top100 datafframe for the Spotify API
billDF = billDF.reindex(columns = ['Song Title','Artists','Weeks on Billboard',
                                   'Key','Mode','Acousticness','Danceability',
                                   'Energy','Instrumentalness','Liveness',
                                   'Loudness','Speechiness','Valence','Tempo',
                                  ])

billDF.head()

Unnamed: 0,Song Title,Artists,Weeks on Billboard,Key,Mode,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo
0,HIGHEST IN THE ROOM,Travis Scott,1,,,,,,,,,,,
1,Truth Hurts,Lizzo,23,,,,,,,,,,,
2,Senorita,Shawn Mendes;Camila Cabello,16,,,,,,,,,,,
3,"10,000 Hours",Dan;Shay;Justin Bieber,1,,,,,,,,,,,
4,Someone You Loved,Lewis Capaldi,22,,,,,,,,,,,


In [13]:
# create a function to population the new columns in dataframe with the values
# from our Spotify API
def spSong(song_name):
    trackID = sp.search(song_name,type='track')['tracks']['items'][1]['uri']
    trackFeat = sp.audio_features(trackID)
    iSong = billDF.index[billDF['Song Title'] == song_name].tolist()[0]
    # Key
    billDF.at[iSong,'Key'] = trackFeat[0]['key']
    # Mode
    billDF.at[iSong,'Mode'] = trackFeat[0]['mode']
    # Acousticness
    billDF.at[iSong,'Acousticness'] = trackFeat[0]['acousticness']
    # Danceability
    billDF.at[iSong,'Danceability'] = trackFeat[0]['danceability']
    # Energy
    billDF.at[iSong,'Energy'] = trackFeat[0]['energy']
    # Instrumentalness
    billDF.at[iSong,'Instrumentalness'] = trackFeat[0]['instrumentalness']
    # Liveness
    billDF.at[iSong,'Liveness'] = trackFeat[0]['liveness']
    # Loudness
    billDF.at[iSong,'Loudness'] = trackFeat[0]['loudness']
    # Speechiness
    billDF.at[iSong,'Speechiness'] = trackFeat[0]['speechiness']
    # Valence
    billDF.at[iSong,'Valence'] = trackFeat[0]['valence']
    # Tempo
    billDF.at[iSong,'Tempo'] = trackFeat[0]['tempo']

In [14]:
# populate my dataframe
for i, song in billDF.iterrows():
    spSong(song[0])

In [15]:
# let's check it out
billDF.head()

Unnamed: 0,Song Title,Artists,Weeks on Billboard,Key,Mode,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo
0,HIGHEST IN THE ROOM,Travis Scott,1,10.0,0.0,0.939,0.818,0.0892,0.614,0.114,-21.319,0.157,0.414,113.931
1,Truth Hurts,Lizzo,23,4.0,0.0,0.193,0.706,0.628,0.0,0.126,-2.94,0.201,0.38,158.036
2,Senorita,Shawn Mendes;Camila Cabello,16,9.0,0.0,0.0392,0.759,0.548,0.0,0.0828,-6.049,0.029,0.749,116.967
3,"10,000 Hours",Dan;Shay;Justin Bieber,1,5.0,0.0,0.203,0.598,0.405,0.0,0.311,-8.155,0.0464,0.244,122.144
4,Someone You Loved,Lewis Capaldi,22,1.0,1.0,0.751,0.501,0.405,0.0,0.105,-5.679,0.0319,0.446,109.891


## API #3: [Genius API](https://docs.genius.com/)
`pip install lyricsgenius`<br>
 
[lyricsgenius documentation](https://github.com/johnwmillr/LyricsGenius)
#### + Vadar Sentiment Analysis

In [16]:
# Libraries for working with genius api
import lyricsgenius as lg

In [17]:
# impliment credentials for accessing API
gpA = lg.Genius(creds['genius']['access_token'])

# create a function for requesting artists
def gpArtist(artist_name,song_amount):
    artist = gpA.search_artist(artist_name, max_songs = song_amount,
                                    sort = "title")
    return artist.songs

# create function for requesting songs
def gpSong(song_name,artist_name):
    song = gpA.search_song(song_name,artist_name)
    return song

In [18]:
# this line of code is here to make vader easier to use
analyzer = vsia()

In [19]:
# add new columns to our top100 datafframe for the Spotify API
billDF = billDF.reindex(columns = ['Song Title','Artists','Weeks on Billboard',
                                   'Key','Mode','Acousticness','Danceability',
                                   'Energy','Instrumentalness','Liveness',
                                   'Loudness','Speechiness','Valence','Tempo',
                                   'vNeg','vNeu','vPos','vCompound'
                                  ])

billDF.head()

Unnamed: 0,Song Title,Artists,Weeks on Billboard,Key,Mode,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo,vNeg,vNeu,vPos,vCompound
0,HIGHEST IN THE ROOM,Travis Scott,1,10.0,0.0,0.939,0.818,0.0892,0.614,0.114,-21.319,0.157,0.414,113.931,,,,
1,Truth Hurts,Lizzo,23,4.0,0.0,0.193,0.706,0.628,0.0,0.126,-2.94,0.201,0.38,158.036,,,,
2,Senorita,Shawn Mendes;Camila Cabello,16,9.0,0.0,0.0392,0.759,0.548,0.0,0.0828,-6.049,0.029,0.749,116.967,,,,
3,"10,000 Hours",Dan;Shay;Justin Bieber,1,5.0,0.0,0.203,0.598,0.405,0.0,0.311,-8.155,0.0464,0.244,122.144,,,,
4,Someone You Loved,Lewis Capaldi,22,1.0,1.0,0.751,0.501,0.405,0.0,0.105,-5.679,0.0319,0.446,109.891,,,,


In [20]:
billDF.tail()

Unnamed: 0,Song Title,Artists,Weeks on Billboard,Key,Mode,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo,vNeg,vNeu,vPos,vCompound
95,Dance Monkey,Tones And I,1,9.0,1.0,0.00387,0.731,0.839,2.4e-05,0.211,-4.509,0.0291,0.948,125.892,,,,
96,POP STAR,DaBaby;Kevin Gates,2,9.0,0.0,0.0374,0.729,0.703,0.000189,0.0998,-6.771,0.12,0.437,155.008,,,,
97,Self Control,YoungBoy Never Broke Again,5,8.0,1.0,0.765,0.572,0.209,0.0,0.356,-10.413,0.0313,0.446,80.069,,,,
98,No Me Conoce,Jhay Cortez;J Balvin;Bad Bunny,14,10.0,0.0,0.145,0.806,0.786,0.0,0.0935,-3.831,0.0741,0.58,91.973,,,,
99,Slow Dancing In The Dark,Joji,2,5.0,0.0,0.00616,0.752,0.883,0.0998,0.167,-4.45,0.0426,0.509,122.026,,,,


In [21]:
# use genius api to grab the lyrics for each song in the hot100 dataframe
for i, song in billDF.iterrows():
    song_name = song[0]
    song_lyrics = gpSong(song[0],song[1]).lyrics
    sentiment = analyzer.polarity_scores(song_lyrics)
    iSong = billDF.index[billDF['Song Title'] == song_name].tolist()[0]
    # Negative Vader Score
    billDF.at[iSong,'vNeg'] = sentiment['neg']
    # Neutral Vader Score
    billDF.at[iSong,'vNeu'] = sentiment['neu']
    # Positive Vader Score
    billDF.at[iSong,'vPos'] = sentiment['pos']
    # Compound Vader Score
    billDF.at[iSong,'vCompound'] = sentiment['compound']

Searching for "HIGHEST IN THE ROOM" by Travis Scott...
Done.
Searching for "Truth Hurts" by Lizzo...
Done.
Searching for "Senorita" by Shawn Mendes;Camila Cabello...
Done.
Searching for "10,000 Hours" by Dan;Shay;Justin Bieber...
Done.
Searching for "Someone You Loved" by Lewis Capaldi...
Done.
Searching for "Circles" by Post Malone...
Done.
Searching for "No Guidance" by Chris Brown;Drake...
Done.
Searching for "Ran$om" by Lil Tecca...
Done.
Searching for "Bad Guy" by Billie Eilish...
Done.
Searching for "Panini" by Lil Nas X...
Done.
Searching for "Bandit" by Juice WRLD;YoungBoy Never Broke Again...
Done.
Searching for "Goodbyes" by Post Malone;Young Thug...
Done.
Searching for "I Don't Care" by Ed Sheeran;Justin Bieber...
Done.
Searching for "Old Town Road" by Lil Nas X;Billy Ray Cyrus...
Done.
Searching for "Beautiful People" by Ed Sheeran;Khalid...
Done.
Searching for "Playing Games" by Summer Walker...
Done.
Searching for "Talk" by Khalid...
Done.
Searching for "Sunflower (Spider

In [22]:
# let's check it out
billDF.head()

Unnamed: 0,Song Title,Artists,Weeks on Billboard,Key,Mode,Acousticness,Danceability,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Valence,Tempo,vNeg,vNeu,vPos,vCompound
0,HIGHEST IN THE ROOM,Travis Scott,1,10.0,0.0,0.939,0.818,0.0892,0.614,0.114,-21.319,0.157,0.414,113.931,0.052,0.839,0.109,0.9747
1,Truth Hurts,Lizzo,23,4.0,0.0,0.193,0.706,0.628,0.0,0.126,-2.94,0.201,0.38,158.036,0.075,0.628,0.297,0.9994
2,Senorita,Shawn Mendes;Camila Cabello,16,9.0,0.0,0.0392,0.759,0.548,0.0,0.0828,-6.049,0.029,0.749,116.967,0.024,0.712,0.265,0.9989
3,"10,000 Hours",Dan;Shay;Justin Bieber,1,5.0,0.0,0.203,0.598,0.405,0.0,0.311,-8.155,0.0464,0.244,122.144,0.021,0.772,0.208,0.9978
4,Someone You Loved,Lewis Capaldi,22,1.0,1.0,0.751,0.501,0.405,0.0,0.105,-5.679,0.0319,0.446,109.891,0.076,0.787,0.138,0.9788


In [24]:
# export my dataframe to excel for visualization in Tableau
billDF.to_excel("API_Data.xlsx", sheet_name='API_Data')