<a href="https://colab.research.google.com/github/cnn22/SingerSongwriter/blob/main/TaylorSwift_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [143]:
import pandas as pd
import pickle
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [144]:
# !pip install spotipy

# Gathering Data
## 1. Get song attributes

In [180]:
songs = pd.read_csv('TaylorSwiftSongs.csv')

In [181]:
songs['SpotifyID'] = None
songs["Tempo"] = None
songs["Loudness"] = None
songs["Key"] = None
songs["isExplicit"] = None
songs["Danceability"] = None
songs["Energy"] = None
songs["Liveness"] = None
songs["Duration"] = None
songs["ReleaseDate"] = None
songs["Popularity"] = None

In [161]:
cid = "2db050f400f245738228ef384306044b"
secret = "6309c1f0455b43428d2cf142e4130a64"

#initiating Spotify client
client_credentials_manager = SpotifyClientCredentials(client_id = cid, client_secret = secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [162]:
#getSongID takes track's name, track's Artist, and track results
#iterate through the track results to get the name in what spotify returned and
# test those results against what song and artist we have in the data frame
def getSongID(trackName, trackArtist, trackResults):
    for track in trackResults['tracks']['items']:
        if (track['name'].lower() == trackName.lower()) & (track['artists'][0]['name'].lower() == trackArtist.lower()):
            return track['id']
    return None

In [150]:
#getExplicit function that takes in a songID and returns if the song is explicit or not
def getExplicit(songID):
    return sp.track(songID)['explicit']

In [151]:
#getTrackAttributes function that takes in a songID and returns song attributes such as, Tempo, Loudness, Key and Duration
def getTrackAttributes(songID):
    return sp.audio_features(tracks=songID)[0]

In [152]:
#def getReleaseDate takes a songID
#and uses the spotify API to allocate the release date of the song's album (assuming that album release date = song's release date)
def getReleaseDate(songID):
    return sp.track(songID)["album"]['release_date']

In [153]:
def getPopularity(songID):
  return sp.track(songID)['popularity']

In [154]:
#getSongKey function that takes in a pitch class and returns the key. Reference: https://en.wikipedia.org/wiki/Pitch_class
def getSongKey(pitchClass):
    if pitchClass == 0:
        return 'C'
    elif pitchClass == 1:
        return 'C#, Db'
    elif pitchClass == 2:
        return 'D'
    elif pitchClass == 3:
        return 'D#, Eb'
    elif pitchClass == 4:
        return 'E'
    elif pitchClass == 5:
        return 'F'
    elif pitchClass == 6:
        return 'F#, Gb'
    elif pitchClass == 7:
        return 'G'
    elif pitchClass == 8:
        return 'G#, Ab'
    elif pitchClass == 9:
        return 'A'
    elif pitchClass == 10:
        return 'A# Bb'
    elif pitchClass == 11:
        return 'B'
    else: return None

In [182]:
for index, row in songs.iterrows():
  trackResults = sp.search(q=row['Title'], type='track', market= 'US', limit=10,offset=0)
  songID = getSongID(row['Title'], row['Artist'], trackResults)
  songs.SpotifyID.iloc[index] = songID

In [183]:
songs.loc[songs['Title'] == 'tis the damn season', 'SpotifyID'] = '4GBkffrtA51p17JH35irGA'
songs.loc[songs['Title'] == "Soon You'll Get Better (feat. The Chicks)", 'SpotifyID'] = '4AYtqFyFbX0Xkc2wtcygTr'
songs.loc[songs['Title'] == "It's Nice To Have A Friend", 'SpotifyID'] = '1SmiQ65iSAbPto6gPFlBYm'
songs.loc[songs['Title'] == "Don't Blame Me", 'SpotifyID'] = '1R0a2iXumgCiFb7HEZ7gUE'
songs.loc[songs['Title'] == "New Year's Day", 'SpotifyID'] = '7F5oktn5YOsR9eR5YsFtqb'

In [184]:
#iteratte through songs in songs data frame (which we pulled from billboard.com) to grab song attributes
#using Spotify's API for things like, isExplicit, tempo, loudness, duration, release date.

for index, row in songs.iterrows():
    #get explicit attribute from Get Track API
    explicitValue = getExplicit(row['SpotifyID'])
    attributes = getTrackAttributes(row['SpotifyID'])

    #filling in values
    songs.Tempo.iloc[index] = attributes['tempo']
    songs.Loudness.iloc[index] = attributes['loudness']
    songs.Key.iloc[index] = getSongKey(attributes['key'])
    songs.isExplicit.iloc[index] = explicitValue
    songs.Danceability.iloc[index] = attributes['danceability']
    songs.Energy.iloc[index] = attributes['energy']
    songs.Liveness.iloc[index] = attributes['liveness']
    songs.Duration.iloc[index] = attributes['duration_ms']/1000 #converting from milliseconds to seconds
    songs.ReleaseDate.iloc[index] = getReleaseDate(row['SpotifyID'])
    songs.Popularity.iloc[index] = getPopularity(row['SpotifyID'])

In [185]:
songs

Unnamed: 0,Title,Artist,Album,SpotifyID,Tempo,Loudness,Key,isExplicit,Danceability,Energy,Liveness,Duration,ReleaseDate,Popularity
0,Lavender Haze,Taylor Swift,Midnights,5jQI2r1RdgtuT8S3iG8zFC,96.985,-10.489,A# Bb,True,0.733,0.436,0.157,202.396,2022-10-21,84
1,Maroon,Taylor Swift,Midnights,3eX0NZfLtGzoLUxPNvRfqm,108.075,-8.294,G,True,0.637,0.398,0.101,218.271,2022-10-21,80
2,Anti-Hero,Taylor Swift,Midnights,0V3wPSX9ygBnCm8psDIegu,97.008,-6.571,E,False,0.637,0.643,0.142,200.69,2022-10-21,92
3,Snow On The Beach (feat. Lana Del Rey),Taylor Swift,Midnights,1wtOxkiel43cVs0Yux5Q4h,109.957,-13.481,A,True,0.663,0.319,0.117,256.124,2022-10-21,78
4,"You're On Your Own, Kid",Taylor Swift,Midnights,4D7BCuvgdJlYvlX5WlN54t,120.041,-10.289,D,False,0.696,0.396,0.125,194.207,2022-10-21,83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,Wildest Dreams,Taylor Swift,1989,59HjlYCeBsxdI0fcm3zglw,140.056,-7.414,"G#, Ab",False,0.554,0.666,0.106,220.44,2014-10-27,74
86,How You Get The Girl,Taylor Swift,1989,6wfugRLamFsTRbPcCpNnP7,119.997,-6.112,F,False,0.765,0.656,0.0918,247.533,2014-10-27,69
87,This Love,Taylor Swift,1989,1kTPQnabROVkW9bUXdCGrB,143.95,-8.795,E,False,0.481,0.435,0.0928,250.093,2014-10-27,62
88,I Know Places,Taylor Swift,1989,2zfgVd034GlUvk7LqBHl6u,159.965,-4.991,C,False,0.602,0.755,0.178,195.707,2014-10-27,68


## 2. Grab Lyrics

In [187]:
import pandas as pd
import pickle
from lyricsgenius import Genius

In [186]:
# !pip install lyricsgenius



### 2.1 Reformat Strings

In [189]:
#finding all the indexes of a word in a sentence.
def findAllIndex(word, sentence):
    result = []
    index = 0;
    while index < len(sentence):
        index = sentence.find(word, index)
        if index == -1:
            break
        result.append(index)
        index += len(word)
    return result

#addSlashToApostrophe takes in a string and adds two slashes after the apostrop
#Before: "Elyse's Girl's Damn's Try's"
#After: "Elyse'\\s Girl'\\s Damn'\\s Try'\\s"
def addSlashToApostrophe(string):
    indexes = findAllIndex("'", string)
    repl_char = "'\\"
    res = [repl_char if idx in indexes else ele for idx, ele in enumerate(list(string))]
    res = ''.join(res)
    return res

#getLyrics takes the artist's name, and song name.
#passes in the artist name and song name into genius API
#to retrieve the lyrics
def getLyrics(artistName, songName):
    result = genius.search_song(songName, artistName)
    if result is not None:
        return result.lyrics
    else:
        if "'" in artistName:
            artistName = addSlashToApostrophe(artistName) #add slash where there's an apostrophe to artist name
        if "'" in songName:
            songName = addSlashToApostrophe(songName)#add slash where there's an apostrophe to song name
        result = genius.search_song(songName, artistName)
        if result is not None: #could not retrieve lyrics
            return result.lyrics
        return None

In [190]:
test = songs.copy()

In [192]:
test['Lyrics'] = None
#iterate through songs data frame to grab lyrics
for index, row in test.iterrows():
    #to prevent grabbing lyrics for the same songs that we've already pulled
    if row['Lyrics'] is not None or row['Lyrics'] == "NOT FOUND":
        continue
    result = getLyrics(row['Artist'],row['Title'])
    if result is None: #genius couldn't find the song...
        result = "NOT FOUND"
    test.Lyrics.iloc[index] = result

Searching for "Lavender Haze" by Taylor Swift...


HTTPError: ignored

In [201]:
clientid = "vHqixP8O2UlLPXgkfxNqgBslNgeEjbPut_I_W8yRHQUji4Fd_XHIfbdmfWZaTo-i"
token = "LyNd6b-620x17tHqrnwWV8PHKXl34Z6U1aS_FnIFmEJr2Qgg9zUPYREh3A8DZtvU"

#instantiating genius client
genius = Genius(token)
genius.remove_section_headers = True
genius.skip_non_songs = True

# Sentiment Analysis

In [193]:
songs.head()

Unnamed: 0,Title,Artist,Album,SpotifyID,Tempo,Loudness,Key,isExplicit,Danceability,Energy,Liveness,Duration,ReleaseDate,Popularity
0,Lavender Haze,Taylor Swift,Midnights,5jQI2r1RdgtuT8S3iG8zFC,96.985,-10.489,A# Bb,True,0.733,0.436,0.157,202.396,2022-10-21,84
1,Maroon,Taylor Swift,Midnights,3eX0NZfLtGzoLUxPNvRfqm,108.075,-8.294,G,True,0.637,0.398,0.101,218.271,2022-10-21,80
2,Anti-Hero,Taylor Swift,Midnights,0V3wPSX9ygBnCm8psDIegu,97.008,-6.571,E,False,0.637,0.643,0.142,200.69,2022-10-21,92
3,Snow On The Beach (feat. Lana Del Rey),Taylor Swift,Midnights,1wtOxkiel43cVs0Yux5Q4h,109.957,-13.481,A,True,0.663,0.319,0.117,256.124,2022-10-21,78
4,"You're On Your Own, Kid",Taylor Swift,Midnights,4D7BCuvgdJlYvlX5WlN54t,120.041,-10.289,D,False,0.696,0.396,0.125,194.207,2022-10-21,83


In [205]:
# songs.to_csv('TaylorSwiftSongData.csv', index = False)