In [221]:
import numpy as np
import pandas as pd
import os
import requests
import bs4
import json
import seaborn as sns
import re
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from difflib import SequenceMatcher
import project04 as proj

In [222]:
def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [223]:
url = 'https://genius.com{}'
artist = 'billie eilish'
search_bar = '/artists/'+artist.capitalize().replace(' ', '-')

In [224]:
response = requests.get(url.format(search_bar))
soup = bs4.BeautifulSoup(response.content)

In [225]:
for i in soup.find_all('a', attrs={'class': 'vertical_album_card'}):
    print(i['title'])

Happier Than Ever
Music from “Billie Eilish: The World’s a Little Blurry” (An Apple Original Film)
WHEN WE ALL FALL ASLEEP, WHERE DO WE GO? (Target Exclusive)
Live at Third Man Records
WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?
WHEN WE ALL FALL ASLEEP, WHERE DO WE GO? (Japanese Import)


In [226]:
new_url = soup.find_all('a', attrs={'class': 'full_width_button'})[1]['href']

In [227]:
albums_request = requests.get(url.format(new_url))
albums = bs4.BeautifulSoup(albums_request.content)

In [228]:
album_lst = albums.find('ul', attrs={'class': 'album_list primary_list'}).find_all('li')
album_lst

[<li><a class="album_link" href="/albums/Billie-eilish/Music-from-billie-eilish-the-worlds-a-little-blurry-an-apple-original-film">Music from “Billie Eilish: The World’s a Little Blurry” (An Apple Original Film)</a></li>,
 <li><a class="album_link" href="/albums/Billie-eilish/Happier-than-ever">Happier Than Ever</a></li>,
 <li><a class="album_link" href="/albums/Billie-eilish/Happier-than-ever-spotify-lyric-mode">Happier Than Ever [Spotify Lyric Mode]</a></li>,
 <li><a class="album_link" href="/albums/Billie-eilish/Unreleased-songs">Unreleased Songs</a></li>,
 <li><a class="album_link" href="/albums/Billie-eilish/When-we-all-fall-asleep-where-do-we-go">WHEN WE ALL FALL ASLEEP, WHERE DO WE GO?</a></li>,
 <li><a class="album_link" href="/albums/Billie-eilish/When-we-all-fall-asleep-where-do-we-go-target-exclusive">WHEN WE ALL FALL ASLEEP, WHERE DO WE GO? (Target Exclusive)</a></li>,
 <li><a class="album_link" href="/albums/Billie-eilish/Dont-smile-at-me-japanese-import">dont smile at me 

In [252]:
song_dict = {}
for i in album_lst:
    link = i.find('a')['href']
    new_url = url.format(link)
    response = requests.get(new_url)
    soup = bs4.BeautifulSoup(response.content)
    tracklist = soup.find_all('div', attrs={'class': 'column_layout-column_span column_layout-column_span--primary'})[1]
    for j in tracklist.find_all('div', attrs={'class': 'chart_row-content'}):
        link = j.find('a')['href']
        song_link = requests.get(link)
        song = bs4.BeautifulSoup(song_link.content)
        song_page = song.find('div', attrs={'id': 'application'})
        title = song.find('h1').text.replace('\u200b', '')
        singer = song.find_all('a')[8].text.lower()
        if singer != artist:
            continue
        if song_page == None:
            continue
        else:
            lyr = song_page.find_all('div')[39].text
            lyr = re.sub(r'([a-z])([A-Z])', r'\1 xxxx \2', lyr)
            lyr = re.sub(r'\[[\w :&-]*\]', ' ', lyr)
            lyr = re.sub(r'Embed[\w .]*', '', lyr)
            lyr = lyr.replace('\u205f', ' ')
            lyr = lyr.replace('\u2005', ' ')
            lyr = re.sub(r'(\)|\?)(\'|\w)', r'\1 \2', lyr)
            lyr = re.sub(r'([a-zA-z])([0-9]+)', r'\1', lyr)
            lyr = lyr.strip()
        if title in song_dict:
            continue
        else:
            song_dict[title] = lyr

In [253]:
lyrics = list(song_dict.values())
cv = CountVectorizer()
word_count = cv.fit_transform(lyrics)

In [254]:
tfidf_transformer = TfidfTransformer(smooth_idf=True, use_idf=True)
tfidf_transformer.fit(word_count)
df_idf = pd.DataFrame(tfidf_transformer.idf_, index=cv.get_feature_names(), columns=['idf_weights'])

In [258]:
count_vector = cv.transform(lyrics)
tf_idf_vector = tfidf_transformer.transform(count_vector)

In [259]:
feature_names = cv.get_feature_names()
first_document_vector = tf_idf_vector[5]
df = pd.DataFrame(first_document_vector.T.todense(), index=feature_names, columns=['tfidf'])
df.sort_values(by=['tfidf'], ascending=False)

Unnamed: 0,tfidf
blah,0.386472
xxxx,0.331343
you,0.299787
birthday,0.257648
bullshit,0.257648
...,...
givin,0.000000
gives,0.000000
givenchy,0.000000
given,0.000000


In [265]:
songs = ' xxxx '.join(lyrics)

In [266]:
tokens = proj.tokenize(songs)

In [267]:
unigram = proj.NGramLM(3, tuple(tokens))

In [295]:
samp = unigram.sample(300)

In [300]:
samp = re.sub(' (\') ', '\'', samp)

In [301]:
samp

'\x02 you\x01ve been uninvited\x01Cause I\x01m overheated , can\x01t say I need you for some time xxxx I\x01m no good xxxx And I don\x01t sympathy for you xxxx You\x01d you want a good girl , then goodbye xxxx I started watchin\x01you out ? Am I satisfactory ? Today , I seem to notice I\x01m hopeless xxxx What an expensive fake xxxx My boy xxxx Don\x01t xxxx And your best friend , too xxxx For I\x01m not sorry , I ’ ve been watchin\x01you out ? Am I my stomach ? My hips ? The body I was like " that\x01s not stoned xxxx Don\x01t be deleted , can\x01t get complacent ) ( Blow away xxxx Come out and play xxxx Take my hand , take your aim xxxx I\x01m so sorry , sorry xxxx Call me cocky , watch your car burn xxxx With your arms , mm - mm , quiet xxxx Hmm , down Call my friends and tell them that I look good ? Just waiting for it gets so boring xxxx A lot can happen in the dark xxxx And I\x01ll be your answer ( Be your answer )\x01Cause I just kinda wish you were gay Is there a 12 step just f

In [302]:
print(samp)

 youve been uninvitedCause Im overheated , cant say I need you for some time xxxx Im no good xxxx And I dont sympathy for you xxxx Youd you want a good girl , then goodbye xxxx I started watchinyou out ? Am I satisfactory ? Today , I seem to notice Im hopeless xxxx What an expensive fake xxxx My boy xxxx Dont xxxx And your best friend , too xxxx For Im not sorry , I ’ ve been watchinyou out ? Am I my stomach ? My hips ? The body I was like " thats not stoned xxxx Dont be deleted , cant get complacent ) ( Blow away xxxx Come out and play xxxx Take my hand , take your aim xxxx Im so sorry , sorry xxxx Call me cocky , watch your car burn xxxx With your arms , mm - mm , quiet xxxx Hmm , down Call my friends and tell them that I look good ? Just waiting for it gets so boring xxxx A lot can happen in the dark xxxx And Ill be your answer ( Be your answer )Cause I just kinda wish you were gay Is there a 12 step just for us to all be grateful that were a tough guy xxxx Chest