# Album Analyzer


This notebook will take the artist name and album name of a given artist and album and will query the user to confirm the album they want to select. 


The notebook will then grab all the lyrics from the album and perform sentiment analysis on it, then it will graph the sentiment analysis and create a word cloud. I am planning on adding a few more features, but for what it is right now, this is what it'll do.


In [None]:
!pip install lyricsgenius
!pip install vaderSentiment
!pip install nltk
import nltk
import json
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from lyricsgenius import Genius

In [None]:
#ask the user to input an artist and an album
artist = input("Enter an artist: ")
album = input("Enter an album: ")

#set up genius api
genius = Genius('PUT YOUR GENIUS API KEY HERE')

#search for the album
album = genius.search_albums(str(artist) + " " + str(album))

album_names = []
album_ids = []
for i in range(len(album['sections'][0]['hits'])):
    album_names.append(album['sections'][0]['hits'][i]['result']['name'])
    album_ids.append(album['sections'][0]['hits'][i]['result']['id'])
album_df = pd.DataFrame({'album_name': album_names, 'album_id': album_ids})
album_df['choice'] = album_df.index
album_df['choice'] = album_df['choice'] + 1

album_df

In [None]:
def get_album(choice):
    album_name = album_df.loc[album_df['choice'] == choice, 'album_name'].iloc[0]
    album_id = album_df.loc[album_df['choice'] == choice, 'album_id'].iloc[0]
    return album_name, album_id

#ask the user to input a choice
choice = int(input("Enter a choice: "))
#call the get_album function
album_name, album_id = get_album(choice)
#display the album name and id``
print(album_name)
print(album_id)

In [None]:
album = genius.search_album(str(artist) + " " + str(album), album_id=album_id)
album.save_lyrics('lyrics.json', overwrite=True)

In [None]:
#read in the lyrics
with open('lyrics.json') as f:
    data = json.load(f)
    print(json.dumps(data, indent=4, sort_keys=True))
    print(data.keys())

In [None]:
#read the json file and print in a readable format and print keys
import json
with open('lyrics.json') as f:
    data = json.load(f)
    print(json.dumps(data, indent=4, sort_keys=True))
    print(data.keys())

#count the number of ids in the json file
print(len(data['tracks']))

#print the keys under 'tracks' in the json file
print(data['tracks'][0].keys())

#print the keys under 'song' in the json file
print(data['tracks'][0]['song'].keys())

#print the lyrics under each song in the json file
for i in range(len(data['tracks'])):
    print(data['tracks'][i]['song']['lyrics'])
    

#add each song and its lyrics to a dataframe
df = pd.DataFrame(columns=['song', 'lyrics'])
for i in range(len(data['tracks'])):
    df = df.append({'song': data['tracks'][i]['song']['title'], 'lyrics': data['tracks'][i]['song']['lyrics']}, ignore_index=True)
df



In [None]:
#each lyric is a string that starts with the song title, remove the song title
df['lyrics'] = df['lyrics'].str.replace(r'^.*\n', '')
df

#remove all punctuation, convert to lowercase, and remove stopwords
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
tokenizer = RegexpTokenizer(r'\w+')
df['lyrics'] = df['lyrics'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word.lower()) for word in tokenizer.tokenize(x) if word not in stop_words]))
df


In [None]:
#for each song, calculate the sentiment score and add it to a new column
analyzer = SentimentIntensityAnalyzer()
df['sentiment_score'] = df['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
df

#perform sentiment analysis on the lyrics of each song, and add the result to a new column, sentiment
df['sentiment'] = df['sentiment_score'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
df

#tokenize the lyrics of each song, split the lyrics into a list of words and add it to a new column
df['tokenized_lyrics'] = df['lyrics'].apply(lambda x: word_tokenize(x))
df


In [None]:
#graph the sentiment analysis of each song
sns.set_style('darkgrid')
plt.figure(figsize=(10, 5))
plt.title('Sentiment Analysis of ' + str(album_name))
sns.barplot(x='song', y='sentiment_score', data=df)
plt.xticks(rotation=90)
plt.show()




#graph the sentiment analysis of the album including the average sentiment score
sns.set_style('darkgrid')
plt.figure(figsize=(10, 5))
sns.countplot(x='sentiment', data=df)
plt.title('Sentiment Analysis of ' + str(album_name))
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.show()
print('Average Sentiment Score: ' + str(df['sentiment_score'].mean()))

        

In [None]:
#make a word cloud of the lyrics of the album
plt.figure(figsize=(10, 5))
plt.title('Word Cloud of ' + str(album_name))
wordcloud = WordCloud(width=800, height=400, background_color='white', max_words=100).generate(' '.join(df['lyrics']))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
