## Angry Spotify
#### Background
The goal of this project is to observe changes in the emotion of anger throughout the decade in popular music. The data I will be using are the "All Out XXs" playlists from Spotify, which feature popular music for that respective decade. This project was heavily inspired by [this project](https://towardsdatascience.com/angriest-death-grips-data-song-anger-code-through-r-ded3aa2fe844) by Evan Oppenheimer, and I utilize his method of classifying anger.
 
#### Classifying "anger"



In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json

In [5]:
# grab credentials for API authorization as well as all the playlists we will be using for the analysis (All out XXs playlists)
credentials = json.load(open('Authorization.json'))
client_id = credentials['client_id']
client_secret = credentials['client_secret']

playlists = json.load(open('Playlists.json'))
#playlist_uri = playlists[playlist_index]['uri']
#like = playlists[playlist_index]['like']

In [6]:
# Now we want to connect to the Genius API so we can grab lyrics, and then scrape the webpage for the lyrics
import requests
import requests
from bs4 import BeautifulSoup as bs
from pprint import pprint

def extract_lyrics(song_title, artist_name):
    # setup connection to Genius API
    base_url = "https://api.genius.com"
    headers = {'Authorization' : 'Bearer ' + 'ZbJWwJ1ewLAI_aPZqY_MCD6WDtmg6du-2dsLWWUT7kA6ggprN54e2m8Pd-uPHrWC'}
    search_url = base_url + '/search'
    data = {'q' : song_title + ' ' + artist_name}
    response = requests.get(search_url, data=data, headers=headers, timeout=None)
    json = response.json()
    remote_song_info = None

    # grab song info
    for hit in json['response']['hits']:
        if artist_name.lower() in hit['result']['primary_artist']['name'].lower():
            remote_song_info = hit
            break

    # Grab the URL from the song info as it's all we need
    song_url = None
    if remote_song_info:
        song_url = remote_song_info['result']['url']

    # now we scrape the lyrics webpage and store each line in a list
    r = requests.get(song_url)
    soup = bs(r.content, 'html')
    lyrics = [i for j in [[line for line in verse.stripped_strings] for verse in soup.select('[data-scrolltrigger-pin]')] for i in j]

    return lyrics

In [7]:
# Now we want to take these lyrics and split them by whitespace and find the frequency of angry words using the lexicon
# First let's load in the data, it's pretty big!
import pandas as pd
lexicon = pd.read_excel('/home/akagi/Documents/Projects/Angry Spotify/NRC-Emotion-Lexicon/NRC-Emotion-Lexicon-v0.92/NRC-Emotion-Lexicon-v0.92-In105Languages-Nov2017Translations.xlsx')

In [8]:
# now let's set up the lexicon data
# contains word and whether it is classified as angry
english_and_angry = [(x,y) for x,y in zip(lexicon['English (en)'], lexicon['Anger'])]

In [9]:
# list of all angry english words
angry_words = [x for x,y in english_and_angry if y == 1]

In [10]:
# Now we want to calculate the number of angry words
import re
def calc_angry_words(lyrics):
    # the lyrics will be fed as outputted extract_lyrics(), which returns a list containing each verse as an element
    # thus we want to take each verse and break it up into it's individual words and throw this into a list
    angry_count = 0
    for verse in lyrics:
        v_list = re.split('; |, ', verse)
        for word in v_list:
            if word in angry_words:
                angry_count += 1

    # number of angry words in a song
    return angry_count

In [11]:
# Now it's time to extract the songs from spotify's playlists and gather information about the valence
# we will construct an anger index similar to the death grips analysis
# We want high sonic anger index = sqrt((1-valence)*energy) as well as a high lyrical anger index  = sqrt(angry words / duration)
# thus our total angry index = sonic anger index + lyrical anger index
# We will then take the average (or median) of songs by decade for this index

In [12]:
# Construct Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=client_id,client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [13]:
# Grab tracks from a playlist
def get_playlist_tracks(username,playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [14]:
# now we begin gathering the data for each playlist
import numpy as np
import math
from itertools import chain

username = 'Spotify'
sonic_anger_indices = {}
decade_id = 50
for playlist in playlists:
    uri = playlist['uri']
    playlist_id = uri.split(':')[2]
    playlist_tracks_data = get_playlist_tracks(username, playlist_id)
    playlist_tracks_id = []
    playlist_tracks_titles = []
    playlist_tracks_first_artists = []

    # gather IDs, name, title for the tracks in the playlist
    for track in playlist_tracks_data:
        playlist_tracks_id.append(track['track']['id'])
        playlist_tracks_first_artists.append(track['track']['artists'][0])

        # Clean song names for proper search
        song_name = track['track']['name']
        song_name = song_name.replace("&", "and")
        song_name = song_name.replace("'", "\'")
        song_name = re.split(r'\s+(?:-\s+|\()', song_name)
        song_name = song_name[0]
        playlist_tracks_titles.append(song_name)

    artists = [elem['name'].replace("&", "and").replace("'", "\''") for elem in playlist_tracks_first_artists]

    # gather audio analysis
    features_list = []
    for id in playlist_tracks_id:
        try:
            features = sp.audio_features(id)
            features_list.append(features)
        except:
            print("Track ID not available")

    features_df = pd.DataFrame(data=list(chain.from_iterable(features_list)), columns=features[0].keys())
    features_df = features_df[['id',
                           'danceability', 'energy', 'key', 'loudness',
                           'mode', 'acousticness', 'instrumentalness',
                           'liveness', 'valence', 'tempo',
                           'duration_ms', 'time_signature']]
    
    valence = features_df[['valence']]
    energy = features_df[['energy']]
    duration = features_df[['duration_ms']]

    # now we calculate the sonic anger index
    s_ang = math.sqrt((1-float(valence.mean()))*float(energy.mean()))

    # now let's calculate the lyrical anger index
    total_angry_words = 0
    for title, artist in zip(playlist_tracks_titles, artists):
        try:
            lyrics = extract_lyrics(title, artist)
            num_angry_words = calc_angry_words(lyrics)
            total_angry_words += num_angry_words
        except:
            print("No results for artist %s with song %s" % (artist, title))
            print("Empty URL, skipping")

    try:
        l_ang = math.sqrt(total_angry_words / float(duration.sum()))
    except:
        print("duration missing, skipping")

    # just aggregate together
    total_ang = s_ang + l_ang

    decade = str(decade_id) + 's'
    sonic_anger_indices[decade] = total_ang
    decade_id += 10

Track ID not available
No results for artist Frank Sinatra with song I've Got You Under My Skin
Empty URL, skipping
No results for artist The Platters with song Smoke Gets In Your Eyes
Empty URL, skipping
No results for artist Ray Charles with song What'd I Say, Pt. 1 and 2
Empty URL, skipping
No results for artist Ella Fitzgerald with song Dream A Little Dream Of Me
Empty URL, skipping
No results for artist Buddy Holly with song That'll Be The Day
Empty URL, skipping
No results for artist Bobby Darin with song Mack the Knife
Empty URL, skipping
No results for artist Fats Domino with song Blueberry Hill
Empty URL, skipping
No results for artist Dean Martin with song That's Amore
Empty URL, skipping
No results for artist The Everly Brothers with song All I Have to Do Is Dream
Empty URL, skipping
No results for artist Chuck Berry with song Johnny B. Goode
Empty URL, skipping
No results for artist Phil Phillips with song Sea Of Love
Empty URL, skipping
No results for artist Ray Charles wi

In [None]:
sonic_anger_indices

{'50s': 0.39431420569370135,
 '60s': 0.4148219016720213,
 '70s': 0.43369555999122505,
 '80s': 0.46928879219526437,
 '90s': 0.5375934388502596,
 '100s': 0.5246415353695427,
 '110s': 0.5703397090563421}