In [12]:
# add libraries
import requests
import json
import pandas as pd
import os
import pprint
import time
import re

# spotipy
import spotipy
from spotipy.oauth2 import SpotifyOAuth

# lyrics genius
from lyricsgenius import Genius

# billboard
from bs4 import BeautifulSoup

# Data Acquisition
### Grab data from different API sources (Spotify, Genius, Billboard 100)

In [None]:
# function for grabbing api key
def get_file_contents(filename):
    """ Given a filename,
        return the contents of that file
    """
    try:
        with open(filename, 'r') as f:
            # It's assumed our file contains a single line,
            # with our API key
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

#### Spotify

In [None]:
spotify_client = "../spotify_client"
spotify_client_secret = "../spotify_client_secret"
spotify_redirect = "../spotify_redirect"

os.environ['SPOTIPY_CLIENT_ID'] = get_file_contents(spotify_client)
os.environ['SPOTIPY_CLIENT_SECRET'] = get_file_contents(spotify_client_secret)
os.environ['SPOTIPY_REDIRECT_URI'] = get_file_contents(spotify_redirect)


spotify_artist_id = "06HL4z0CvFAxyc27GXpf02"

Endpoint usage: artist albums -> for each get album tracks -> for each track get the tracks audio features

In [None]:
# # artist albums
# scope = "user-library-read"

# sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

# results = sp.current_user_saved_tracks()
# for idx, item in enumerate(results['items']):
#     track = item['track']
#     print(idx, track['artists'][0]['name'], " – ", track['name'])

In [None]:
scope = "user-library-read"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [None]:
taylor_url = f'spotify:artist:{spotify_artist_id}'
results = sp.artist_albums(taylor_url, album_type='album', country = 'CA')

albums = results['items']

while results['next']:
    results = sp.next(results)
    albums.extend(results['items'])

# for album in albums:
#     print(album['id'])

# for album in albums:
#     print(album['name'])

In [None]:
# there are some duplicate albums because some songs are explicit creating a new album
# so lets just try to take the unique album names
# after grabbing unique album also take the singles potentially
# lets take songs from these albums
# pick Taylor's Version and Deluxe Albums
# deluxe version usually is a rerelease but with more songs and with some acoustic versions

# I will put the index of the album beside the name
# Midnights (3am Edition)        0 
# Red (Taylor's Version)         4
# Fearless (Taylor's Version)    6
# evermore (deluxe version)      7
# folklore (deluxe version)     13
# Lover                         17
# reputation                    19
# 1989 (deluxe version)         26               
# Taylor Swift                  45
# Speak Now (Deluxe Edition)    36
picked_albums= []

picked_albums.append(albums[2])
picked_albums.append(albums[6])
picked_albums.append(albums[8])
picked_albums.append(albums[9])
picked_albums.append(albums[15])
picked_albums.append(albums[19])
picked_albums.append(albums[21])
picked_albums.append(albums[28])
picked_albums.append(albums[45])
picked_albums.append(albums[38])

In [None]:
for album in picked_albums:
    print(album['name'])

In [None]:
tracks = []
album_names = []
album_release_date = []
for album in picked_albums:
    for i in range(album['total_tracks']):
        album_names.append(album['name'])
        album_release_date.append(album['release_date'])
    results = sp.album_tracks(album['id'])
    tracks.extend(results['items'])

names_tracks = []
for track in tracks:
    names_tracks.append(track['name'])
    print(track['name'])

print(len(tracks))
# there are 197 tracks

In [None]:
# using the track id get track audio features and audio analysis
# getting audio features requires sending a get request for a comma seperated list of spotify ids with max 100 IDS

track_ids = []

for track in tracks:
    track_ids.append(track['id'])

In [None]:
# now we make api call to get the audio features
audio_features = []

results = sp.audio_features(track_ids[0:100])
audio_features.extend(results)

results = sp.audio_features(track_ids[100:])
audio_features.extend(results)

In [None]:
audio_features

In [None]:
# make a data frame to store all the data
first_df = pd.DataFrame.from_dict(audio_features)

In [None]:
first_df.head()

In [None]:
# now add names to each and the album that each track belongs to
first_df['Name'] = names_tracks

# lets also try to add the album names
first_df['Album Name'] = album_names

# add release date of album
first_df['Release Date'] = album_release_date

In [None]:
first_df.head()

In [None]:
# now we can cut out some features
# like urls, type, href

first_df = first_df.drop(['analysis_url', 'track_href', 'uri', 'type'], axis = 1)

In [None]:
first_df.head()

In [None]:
# check for null values

first_df.isnull().any()

In [None]:
# save this csv 
first_df.to_csv('../../data/picked_tracks.csv')

#### Genius

In [None]:
# use the lyricsgenius library by John Miller 
# https://lyricsgenius.readthedocs.io/en/master/

GENIUS_API_TOKEN = "../genius_api"

os.environ['GENIUS_API_TOKEN'] = get_file_contents(GENIUS_API_TOKEN)
genius =  Genius(os.getenv('GENIUS_API_TOKEN'))


In [None]:
# artist = genius.search_artist("Taylor Swift", max_songs=3, sort="title")
# print(artist.songs)

In [None]:
song = genius.search_song("Lavender Haze", artist.name)

In [None]:
print(song.lyrics)

In [None]:
# make a giant corpus of the picked tracks lyrics to analyze using NLP
# or make a corpus out of the album track lyrics

In [None]:
unique_albums = first_df['Album Name'].unique().tolist()

In [None]:
unique_albums

In [None]:
# 1989 title wrong
unique_albums.remove("1989 (Deluxe Edition)")
unique_albums.append("1989 (Deluxe)")

# same with Speak Now

unique_albums.remove("Speak Now (Deluxe Edition)")
unique_albums.append("Speak Now (Deluxe)")

In [None]:
unique_albums

In [None]:
genius.remove_section_headers = True # Remove section headers (e.g. [Chorus]) from lyrics when searching
genius.skip_non_songs = False # Include hits thought to be non-songs (e.g. track lists)
genius.excluded_terms = ["(Remix)", "(Live)", "Memo", "Pop"] # Exclude songs with these words in their title

# for album in unique_albums:
#     album = genius.search_album(album, "Taylor Swift")
#     album.save_lyrics()
#     time.sleep(30)

In [None]:
album = genius.search_album("Taylor Swift")
# album.save_lyrics()
album

# find the English version 
# albumID 
# 12682

In [None]:
album = genius.search_album(album_id=12682)
album.save_lyrics()

In [78]:
# now time to parse the lyrics
# grab the lyrics and line number seperated by \n in the json

with open('../../data/lyrics/Lyrics_TaylorSwift.json', 'r') as f:
  data = json.load(f)

In [79]:
dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

# the first line doesn't contain lyrics, there is also an ad inside the lyrics? 
# get rid of 1st line and the 30 line
# See Taylor Swift LiveGet tickets as low as $1,111You might also like
# the last line also contains an embed 

    
for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

In [80]:
dict

{'title': ['Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Tim McGraw',
  'Picture to Burn',
  'Picture to Burn',
  'Picture to Burn',
  'Picture to Burn',
  'Picture to Burn',
  'Picture to Burn',
 

In [81]:
taylor_swift_df = pd.DataFrame.from_dict(dict)

taylor_swift_df.sample(10)

Unnamed: 0,title,lyrics,line,album,year
580,Teardrops on My Guitar (Pop Version),"And there he goes, so perfectly",21,Taylor Swift,2006
453,I’m Only Me When I’m With You,And you know everything about me,21,Taylor Swift,2006
358,Mary’s Song (Oh My My My),And our daddies used to joke about the two of us,17,Taylor Swift,2006
212,Tied Together with a Smile,"The water's high, you're jumping into it and l...",6,Taylor Swift,2006
452,I’m Only Me When I’m With You,"Through it all, nobody gets me like you do",20,Taylor Swift,2006
99,Teardrops On My Guitar,Drew walks by me,15,Taylor Swift,2006
326,Should’ve Said No,I can't resist,35,Taylor Swift,2006
473,I’m Only Me When I’m With You,"Well, I'm only me when I'm with you",41,Taylor Swift,2006
477,Invisible,Light up when you smile,1,Taylor Swift,2006
567,Teardrops on My Guitar (Pop Version),Drew talks to me,8,Taylor Swift,2006


In [82]:
with open('../../data/lyrics/Lyrics_RedTaylorsVersion.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

red_df = pd.DataFrame.from_dict(dict)

In [83]:
with open('../../data/lyrics/Lyrics_1989Deluxe.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

deluxe_1989_df = pd.DataFrame.from_dict(dict)

In [84]:
with open('../../data/lyrics/Lyrics_evermoredeluxeversion.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

evermore_df = pd.DataFrame.from_dict(dict)

In [85]:
with open('../../data/lyrics/Lyrics_FearlessTaylorsVersion.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

fearless_df = pd.DataFrame.from_dict(dict)

In [86]:
with open('../../data/lyrics/Lyrics_folkloredeluxeversion.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

folklore_df = pd.DataFrame.from_dict(dict)

In [87]:
with open('../../data/lyrics/Lyrics_Lover.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

lover_df = pd.DataFrame.from_dict(dict)

In [88]:
with open('../../data/lyrics/Lyrics_Midnights3amEdition.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

midnights_df = pd.DataFrame.from_dict(dict)

In [89]:
with open('../../data/lyrics/Lyrics_reputation.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

reputation_df = pd.DataFrame.from_dict(dict)

In [90]:
with open('../../data/lyrics/Lyrics_SpeakNowDeluxe.json', 'r') as f:
  data = json.load(f)

dict = {}
dict['title'] = []
dict['lyrics'] = []
dict['line'] = []
dict['album'] = []
dict['year'] = []

for track in data['tracks']:
    title = track.get('song').get('title')
    lyrics = track.get('song').get('lyrics')
    lines = lyrics.split("\n")
    album = data['name']
    year = data['release_date_components']['year']
    line_number = 0
    
    if '[Liner Notes]' in title:
        continue
    for line in lines:
        lower_line = line.lower()
        
        if 'contributorstranslations' in lower_line:
            continue

        if line == "":
            continue

        if 'get tickets' in lower_line:
            continue

        if '25Embed' in line:
            line = re.sub('25Embed', '', line)

        dict['year'].append(year)
        dict['album'].append(album)
        dict['title'].append(title)
        dict['lyrics'].append(line)
        dict['line'].append(line_number)
        line_number += 1

speak_now_df = pd.DataFrame.from_dict(dict)

In [91]:
lyrics_df = pd.concat([deluxe_1989_df, evermore_df, fearless_df, folklore_df, lover_df, midnights_df, red_df, reputation_df, speak_now_df, taylor_swift_df])

In [92]:
lyrics_df.sample(10)

Unnamed: 0,title,lyrics,line,album,year
297,Mean,(Why you gotta be so mean?),51,Speak Now (Deluxe),2010
36,Tim McGraw,And I'm back for the first time since then,36,Taylor Swift,2006
948,New Year’s Day,Whose laugh I could recognize anywhere,38,reputation,2017
208,Tied Together with a Smile,You walk around here thinking you're not pretty,2,Taylor Swift,2006
204,Dear John,Then go back and turn it to rain,7,Speak Now (Deluxe),2010
378,​this is me trying,They told me all of my cages were mental,10,folklore (deluxe version),2020
1050,That’s When (Taylor’s Version) [From the Vault],"And I said, ""Leave those all in our past""",20,Fearless (Taylor’s Version),2021
841,​r​ight where you left me,Everybody moved on,37,evermore (deluxe version),2021
810,​r​ight where you left me,Wages earned and lessons learned,6,evermore (deluxe version),2021
584,London Boy,Like a Tennessee Stella McCartney on the Heath,51,Lover,2019


In [93]:
lyrics_df.info

<bound method DataFrame.info of                                     title  \
0                     Welcome to New York   
1                     Welcome to New York   
2                     Welcome to New York   
3                     Welcome to New York   
4                     Welcome to New York   
..                                    ...   
594  Teardrops on My Guitar (Pop Version)   
595  Teardrops on My Guitar (Pop Version)   
596  Teardrops on My Guitar (Pop Version)   
597  Teardrops on My Guitar (Pop Version)   
598  Teardrops on My Guitar (Pop Version)   

                                               lyrics  line          album  \
0       Walking through a crowd, the village is aglow     0  1989 (Deluxe)   
1         Kaleidoscope of loud heartbeats under coats     1  1989 (Deluxe)   
2                Everybody here wanted something more     2  1989 (Deluxe)   
3        Searching for a sound we hadn't heard before     3  1989 (Deluxe)   
4                                    

In [94]:
# save as csv
lyrics_df.to_csv('../../data/lyrics.csv')

#### Billboard 100

In [14]:
# use webscraper to get Taylor Swift billboard 100 rankings 
URL = "https://www.billboard.com/artist/taylor-swift/chart-history/hsi/"
page = requests.get(URL)


soup = BeautifulSoup(page.content, "html.parser")

In [88]:
title = []

songs = soup.find_all("div", class_="o-chart-results-list__item // lrv-u-flex lrv-u-flex-direction-column lrv-u-flex-grow-1 lrv-u-justify-content-center lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light lrv-u-padding-lr-2 lrv-u-padding-lr-1@mobile-max lrv-u-padding-tb-050@mobile-max")
for song in songs:
    title_element = song.find("h3", class_="c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only artist-chart-row-title")
    title.append(title_element.text.strip())

In [89]:
debut_date = []

debut_dates = soup.find_all("div", class_="o-chart-results-list__item // u-width-143@tablet u-width-67@mobile-max lrv-u-flex lrv-u-align-items-center lrv-u-justify-content-center u-justify-content-flex-end@mobile-max u-flex-grow-1@mobile-max lrv-u-background-color-grey-lightest lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light lrv-u-padding-r-075@mobile-max")
for date in debut_dates:
    date_element = date.find('a', class_="c-label__link")
    debut_date.append(date_element.text.strip())

In [90]:
peak_position = []

peak_positions = soup.find_all("div", class_= "o-chart-results-list__item // u-width-72 u-width-55@mobile-max lrv-u-flex lrv-u-flex-direction-column lrv-u-align-items-center lrv-u-justify-content-center u-background-color-white-064@mobile-max lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light")
for position in peak_positions:
    peak_position_element = position.find("span", class_="c-label a-font-primary-bold-m lrv-u-font-size-28@tablet lrv-u-padding-b-025 lrv-u-padding-b-00@mobile-max artist-chart-row-peak-pos")
    peak_position.append(peak_position_element.text.strip())

In [91]:
peak_date = []

peak_dates = soup.find_all('div', class_="o-chart-results-list__item // u-width-143@tablet u-width-82@mobile-max lrv-u-flex lrv-u-align-items-center lrv-u-justify-content-center lrv-u-background-color-grey-lightest lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light")
for date in peak_dates:
    date_element = date.find('a', class_="c-label__link")
    peak_date.append(date_element.text.strip())

In [92]:
weeks_on_chart = []

wks_on_chart = soup.find_all("div", class_="o-chart-results-list__item // u-width-72 u-width-55@mobile-max lrv-u-flex lrv-u-align-items-center lrv-u-justify-content-center u-background-color-white-064@mobile-max lrv-u-border-b-1 u-border-b-0@mobile-max lrv-u-border-color-grey-light")
for entry in wks_on_chart:
    # print(entry)
    week = entry.find("span", class_="c-label a-font-primary-m artist-chart-row-week-on-chart")
    weeks_on_chart.append(week.text.strip())

In [94]:
# now combine the scraped data into a dataframe
billboard_dict = {'title': title,
                  'debut_date': debut_date,
                  'peak_position': peak_position,
                  'peak_date': peak_date,
                  'weeks_on_chart': weeks_on_chart}
billboard_df = pd.DataFrame(data = billboard_dict)

In [95]:
billboard_df.head()

Unnamed: 0,title,debut_date,peak_position,peak_date,weeks_on_chart
0,You Belong With Me,11.22.08,2,08.22.09,50
1,Shake It Off,09.06.14,1,09.06.14,50
2,Love Story,09.27.08,4,01.17.09,49
3,Teardrops On My Guitar,03.24.07,13,03.01.08,48
4,Our Song,10.13.07,16,01.19.08,36


In [None]:
# save the billboard df


#### Analysis

In [1]:
# load the picked tracks from spotify and perform EDA
