# Rap Music Analysis

In [1]:
import urllib.request
import urllib.parse
import urllib.error
from urllib.request import Request, urlopen
import requests

from bs4 import BeautifulSoup

import pandas as pd
import json

import string

In [2]:
# Create an empty DataFrame
column_names = ['Title','Artist','Lyrics','Comments']
df = pd.DataFrame(columns = column_names)

In [3]:
def request_artist_info(artist_name, page):
    
    import getpass
    base_url = 'https://api.genius.com'
    token = getpass.getpass('Enter your Genius API token:')
    
    headers = {'Authorization': 'Bearer ' + token}
    search_url = base_url + '/search?per_page=50&page=' + str(page)
    data = {'q': artist_name}
    response = requests.get(search_url, data=data, headers=headers)
    
    return response 

In [4]:
response = request_artist_info('Drake', 1)
json = response.json()

Enter your Genius API token:········


In [5]:
json = response.json()

artist = []
song_name = []

for hit in json['response']['hits']: 
    artist.append(hit['result']['primary_artist']['name'])
    song_name.append(hit['result']['title'])

In [6]:
df['Title']=song_name
df['Artist'] = artist

In [7]:
def request_song_url(artist_name, song_cap):
    
    """ This function populates a list with urls to the songs by a chosen artist. 

    Args: 
        artist_name(str): name of the artist
        song_cap(int): number of urls/songs you want 
        
    Returns: 
        list of strings, each string is a url to a song by the artist
    
    """
    page = 1
    songs = []
    
    while True:
        response = request_artist_info(artist_name, page)
        json = response.json()       
        song_info = []
        
        for hit in json['response']['hits']:
            if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
                song_info.append(hit)
        
        for song in song_info:
            if (len(songs) < song_cap):
                url = song['result']['url']
                songs.append(url)
            
        if (len(songs) == song_cap):
            break
        else:
            page += 1
    
    return songs

In [8]:
urls = request_song_url('drake',20)

Enter your Genius API token:········


In [9]:
urls

['https://genius.com/Drake-gods-plan-lyrics',
 'https://genius.com/Drake-in-my-feelings-lyrics',
 'https://genius.com/Drake-hotline-bling-lyrics',
 'https://genius.com/Drake-one-dance-lyrics',
 'https://genius.com/Drake-hold-on-were-going-home-lyrics',
 'https://genius.com/Drake-know-yourself-lyrics',
 'https://genius.com/Drake-back-to-back-lyrics',
 'https://genius.com/Drake-all-me-lyrics',
 'https://genius.com/Drake-fake-love-lyrics',
 'https://genius.com/Drake-from-time-lyrics',
 'https://genius.com/Drake-0-to-100-the-catch-up-lyrics',
 'https://genius.com/Drake-started-from-the-bottom-lyrics',
 'https://genius.com/Drake-the-motto-lyrics',
 'https://genius.com/Drake-pound-cake-paris-morton-music-2-lyrics',
 'https://genius.com/Drake-and-future-jumpman-lyrics',
 'https://genius.com/Drake-hyfr-lyrics',
 'https://genius.com/Drake-marvins-room-lyrics',
 'https://genius.com/Drake-nice-for-what-lyrics',
 'https://genius.com/Drake-passionfruit-lyrics',
 'https://genius.com/Drake-childs-pla

In [10]:
counter = 0

for url in urls: 
    
    req = Request(url, headers = {"User-Agent" : "Mozilla/5.0"})
    webpage = urlopen(req).read()

    # Create a BeautifulSoup object 
    soup = BeautifulSoup(webpage, 'html.parser')
    html = soup.prettify('utf-8')
    songs = {}
    songs['lyrics'] = [];
    songs['comments'] = [];

    # Extract user comments on the song
    for div in soup.findAll('div',attrs={'class':'rich_text_formatting'}):
        comments = div.text.strip().split("\n")
        
        for comment in comments: 
            if comment != "":
                songs['comments'].append(comment);
                df['Comments'].iloc[counter] = songs['comments']
        
    # Extract the lyrics of the song
    for div in soup.findAll('div', attrs = {'class':'lyrics'}):
        songs['lyrics'].append(div.text.strip().split("\n"));
        df['Lyrics'].iloc[counter] = songs['lyrics'][0]
    
    counter +=1

In [11]:
def remove_punctuation(str1): 
    
    for char in str1.lower():
        
        if char in string.punctuation: 
            
            str1 = str1.replace(char,"")          
    
    return str1

In [12]:
lyric_values = df['Lyrics']

size = len(lyric_values)  # size holds the number of rows
n=-1 # we want to start from cell 0, the first increment would make the -1 a 0

lst_words = []

while True: 
    
    n += 1  
    if n == size: 
        break 
    
    wrds = [] # will hold the list of words in each cell 

    # inspect the next cell
    print ("-" * 40, "cell " + str(n), "-" * 40)
    text = lyric_values[n][1]
    text_str = str(text).lower()
    no_punct_str = remove_punctuation(text_str)
    
    for word in no_punct_str.split(): 
        wrds.append(word)
        
    lst_words.append(len(wrds))

    print ("The text in this cell:\n\n", text, "\n")
    print("The number of words in this cell:\n",len(wrds),'\n\n')

---------------------------------------- cell 0 ----------------------------------------
The text in this cell:

 And they wishin' and wishin' and wishin' and wishin' 

The number of words in this cell:
 9 


---------------------------------------- cell 1 ----------------------------------------
The text in this cell:

 Trap, TrapMoneyBenny 

The number of words in this cell:
 2 


---------------------------------------- cell 2 ----------------------------------------
The text in this cell:

 You used to call me on my 

The number of words in this cell:
 7 


---------------------------------------- cell 3 ----------------------------------------
The text in this cell:

 Baby, I like your style 

The number of words in this cell:
 5 


---------------------------------------- cell 4 ----------------------------------------
The text in this cell:

  

The number of words in this cell:
 0 


---------------------------------------- cell 5 ----------------------------------------
The te

In [13]:
df.insert(4,'total_words', pd.Series(lst_words))

In [14]:
df

Unnamed: 0,Title,Artist,Lyrics,Comments,total_words
0,God’s Plan,Drake,"[[Intro], And they wishin' and wishin' and wis...",[“God’s Plan” is a feel-good track that discus...,9
1,In My Feelings,Drake,"[[Intro: Drake], Trap, TrapMoneyBenny, This sh...","[“In My Feelings” is an upbeat, club-mixed lov...",2
2,Hotline Bling,Drake,"[[Intro], You used to call me on my, You used ...",[“Hotline Bling” is most likely about Drake’s ...,7
3,One Dance,Drake,"[[Intro: Kyla], Baby, I like your style, , [Ve...",[“One Dance” is an R&B song with dancehall and...,5
4,"Hold On, We’re Going Home",Drake,"[[Produced by Nineteen85, Majid Jordan & Noah ...",[Drake’s single isn’t a rap song as we all kno...,0
5,Know Yourself,Drake,"[[Part 1], , [Intro], Hol' it yute, hol' it, h...",[Drake first used the phrase “know yourself” i...,0
6,Back to Back,Drake,"[[Intro], Oh man, Oh man, oh man, Not again, ,...",[“Back to Back” is the second of Drake’s respo...,2
7,All Me,Drake,"[[Produced by Key Wane], , [Intro: Aziz Ansari...","[Drake, 2 Chainz and Big Sean team up for a hi...",0
8,Fake Love,Drake,"[[Chorus], I've been down so long, it look lik...",[“Fake Love” finds Drake criticizing friends a...,11
9,From Time,Drake,"[[Intro: Jhené Aiko], What's up?, Been a minut...",[Song #7 on Nothing Was the Same is a convers...,2
