In [11]:
import requests
import csv
import time 
import random
from bs4 import BeautifulSoup as bs

In [18]:
# This list holds the values that is stored in the dataset
list_songs = []

# Open the csv file and iterates through the entire file and appends the data to 'list_songs'.
with open('dataset_8.csv') as d8:
    reader = csv.DictReader(d8)
    for row in reader:
        list_songs.append(row)

In [13]:
# Testing if the first row was read correctly
list_songs[0]

{'song title': 'Come Rain Or Come Shine',
 'artist/band name': 'Davis Jr., Sammy',
 'year': '1955',
 'lyrics url': 'https://www.azlyrics.com/lyrics/sammydavisjr/comerainorcomeshine.html'}

In [14]:
# All 500 rows were appended
print(len(list_songs))

500


In [15]:
# Iterate through all the songs in the dataset
for index, song in enumerate(list_songs):
    # Message that indicates if the song was successful
    print("Scraping #", index + 1, song['song title'])
    
    # This statement shows whether if the value is a song URl or not
    if (not song["lyrics url"] or song["lyrics url"] is None):
        print("Skipping " + song['song title'])
        
    # Fetch the lyric page
    song_page = requests.get(song["lyrics url"])
    
    # Shows if the request was successful or not
    if (song_page.status_code != 200):
        print("Skipping " + song['song title'])
        continue 
    
    # Parse the HTML content 
    soup = bs(song_page.content, "html.parser")
    
    # Finds the lyrics
    lyric_div = soup.find(class_= "col-xs-12 col-lg-8 text-center").find("div", attrs = {'class': None})   
   
    # Removes HTML from the div
    lyrics = lyric_div.text.splitlines()
    
    # Removes any null entries 
    lyrics = list(filter(None, lyrics))
    
    # Combines the lyrics into a string
    song["lyrics"] = str.join("\n", lyrics)
    
    # Ensures that there is pausing between each request (random times between 3-20 seconds)
    time.sleep(random.randint(3, 20))
    
    

Scraping # 1 Come Rain Or Come Shine
Scraping # 2 Thursday's Child
Scraping # 3 Sunday Blues
Scraping # 4 Love Is The Thing
Scraping # 5 Sweet And Lovely
Scraping # 6 Rescue The Perishing
Scraping # 7 Ill Wind
Scraping # 8 Between The Devil And The Deep Blue Sea
Scraping # 9 Santa Claus Is Comin' To Town
Scraping # 10 By The Beautiful Sea
Scraping # 11 The First Train Headin' South
Scraping # 12 (Now And Then There's) A Fool Such As I
Scraping # 13 Not As A Stranger
Scraping # 14 What Kind Of Man Are You
Scraping # 15 No One To Cry To
Scraping # 16 Oh Johnny, Oh Johnny, Oh!
Scraping # 17 Then You've Never Been Blue
Scraping # 18 September Song
Scraping # 19 My Kinda Love
Scraping # 20 Transfusion Blues
Scraping # 21 Window Up Above
Scraping # 22 Oh! Carol
Scraping # 23 Medley: One Kiss / My Romance / The Vagabond King Waltz
Scraping # 24 Backtrack
Scraping # 25 Last Letter
Scraping # 26 Down On The Corner Of Love
Scraping # 27 I Have But One Heart
Scraping # 28 Coquette
Scraping # 29 C

Scraping # 236 Ten Commandments Of Love
Scraping # 237 Un Garcon Pas Comme Les Autres (Ziggy)
Scraping # 238 Dance Of Love
Scraping # 239 I Am A Town
Scraping # 240 My Destiny
Scraping # 241 El Negro Bembon
Scraping # 242 Halloween On The Barbary Coast
Scraping # 243 The Likes Of Me
Scraping # 244 It's Too Soon To Know
Scraping # 245 Body And Soul
Scraping # 246 Facet Squared
Scraping # 247 Ain't Got Nothing If You Ain't Got Love
Scraping # 248 (Remember) Walking In The Sand
Scraping # 249 Face Up And Sing
Scraping # 250 De Aqui Para Alla
Scraping # 251 Beachparty
Scraping # 252 Quantum Mechanic
Scraping # 253 D.E.E.P.
Scraping # 254 If I Had A Wish
Scraping # 255 How Does It Feel To Feel?
Scraping # 256 Cottage For Sale
Scraping # 257 White Boy
Scraping # 258 When It All Comes Down
Scraping # 259 Breakdown
Scraping # 260 Can't Hold On
Scraping # 261 Crush, Kill, Destroy
Scraping # 262 Hush Baby Hush
Scraping # 263 Tie Me Up
Scraping # 264 Hangin' Your Life On The Wall
Scraping # 265 T

In [16]:
# Test if the lyrics were added to the dataset
list_songs[0]

{'song title': 'Come Rain Or Come Shine',
 'artist/band name': 'Davis Jr., Sammy',
 'year': '1955',
 'lyrics url': 'https://www.azlyrics.com/lyrics/sammydavisjr/comerainorcomeshine.html',
 'lyrics': "I'm gonna love you like nobody's loved you\nCome rain or come shine\nHigh as a mountain and deep as a river\nCome rain or come shine\nI guess when you met me\nIt was just one of those things\nBut don't ever bet me\n'Cause I'm gonna be true if you let me\nYou're gonna love me like nobody's loved me\nCome rain or come shine\nHappy together, unhappy together\nAnd won't it be fine\nDays may be cloudy or sunny\nWe're in or we're out of the money\nBut I'm with you always\nI'm with you rain or shine"}

In [17]:
# Create a new CSV with all the new lyrics for each songs
with open("new_dataset8.csv", "w", encoding = "utf8", newline = "") as newFile:
    newHeader = csv.DictWriter(newFile, fieldnames = ["song title", "artist/band name", "year", "lyrics url", "lyrics"])
    newHeader.writeheader()
    
    for song in list_songs:
        newHeader.writerow(song)