# Album Analyzer


This notebook will take the artist name and album name of a given artist and album and will query the user to confirm the album they want to select. 


The notebook will then grab all the lyrics from the album and perform sentiment analysis on it, then it will graph the sentiment analysis and create a word cloud. I am planning on adding a few more features, but for what it is right now, this is what it'll do.


In [None]:
# !pip install lyricsgenius
# !pip install vaderSentiment
# !pip install nltk
import nltk
import json
import requests
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer
from lyricsgenius import Genius
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import numpy as np 
import requests
from io import BytesIO 
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import os
from dotenv import load_dotenv

In [None]:
#load api key from .env file
load_dotenv()
genius = Genius('GENIUS_API_KEY')

#ask the user to input an artist and an album
artist = input("Enter an artist: ")
album = input("Enter an album: ")

#parameters for the api call
genius.remove_section_headers = True
genius.excluded_terms = ["(Remix)", "(Live)"]
genius.skip_non_songs = True

#search for the album
album = genius.search_albums(str(artist) + " " + str(album))

album_names = []
album_ids = []
for i in range(len(album['sections'][0]['hits'])):
    album_names.append(album['sections'][0]['hits'][i]['result']['name'])
    album_ids.append(album['sections'][0]['hits'][i]['result']['id'])
album_df = pd.DataFrame({'album_name': album_names, 'album_id': album_ids})
album_df['choice'] = album_df.index
album_df['choice'] = album_df['choice'] + 1

album_df

In [None]:
def get_album(choice):
    album_name = album_df.loc[album_df['choice'] == choice, 'album_name'].iloc[0]
    album_id = album_df.loc[album_df['choice'] == choice, 'album_id'].iloc[0]
    return album_name, album_id

#ask the user to input a choice
choice = int(input("Enter a choice: "))
#call the get_album function
album_name, album_id = get_album(choice)
#display the album name and id``
print(album_name)
print(album_id)

In [None]:
album = genius.search_album(str(artist) + " " + str(album), album_id=album_id)
album.save_lyrics('lyrics.json', overwrite=True)


In [None]:
with open('lyrics.json') as f:
    data = json.load(f)
    print(json.dumps(data, indent=4, sort_keys=True))



In [None]:
#display the cover_art_thumbnail_url as in image from the web in a jupyter notebook and download it
from IPython.display import Image
from IPython.core.display import HTML
cover_art = Image(url= data['cover_art_url'])
cover_art


## Download the album image locally

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
url = data['cover_art_url']
response = requests.get(url, headers=headers)

#save response as a file
with open('cover_art.png', 'wb') as f:
    f.write(response.content)   


In [None]:
#read in the lyrics
with open('lyrics.json') as f:
    data = json.load(f)
    print(json.dumps(data, indent=4, sort_keys=True))
    print(data.keys())

In [None]:
#read the json file and print in a readable format and print keys
import json
with open('lyrics.json') as f:
    data = json.load(f)
    print(json.dumps(data, indent=4, sort_keys=True))
    print(data.keys())

#count the number of ids in the json file
print(len(data['tracks']))

#print the keys under 'tracks' in the json file
print(data['tracks'][0].keys())

#print the keys under 'song' in the json file
print(data['tracks'][0]['song'].keys())

#print the lyrics under each song in the json file
for i in range(len(data['tracks'])):
    print(data['tracks'][i]['song']['lyrics'])
    

#add each song and its lyrics to a dataframe
df = pd.DataFrame(columns=['song', 'lyrics'])
for i in range(len(data['tracks'])):
    df = df.append({'song': data['tracks'][i]['song']['title'], 'lyrics': data['tracks'][i]['song']['lyrics']}, ignore_index=True)
df



In [None]:
#each lyric is a string that starts with the song title, remove the song title
df['lyrics'] = df['lyrics'].str.replace(r'^.*\n', '')
df

#remove all punctuation, convert to lowercase, and remove stopwords, include the word 'chorus'
stop_words = set(stopwords.words('english'))
newStopWords = ['chorus','embed', 'like2embed']
stop_words.update(newStopWords)
lemmatizer = WordNetLemmatizer()
tokenizer = RegexpTokenizer(r'\w+')
df['lyrics'] = df['lyrics'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word.lower()) for word in tokenizer.tokenize(x) if word not in stop_words]))
df


In [None]:
#for each song, calculate the sentiment score and add it to a new column
analyzer = SentimentIntensityAnalyzer()
df['sentiment_score'] = df['lyrics'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
df

#perform sentiment analysis on the lyrics of each song, and add the result to a new column, sentiment
df['sentiment'] = df['sentiment_score'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
df

#tokenize the lyrics of each song, split the lyrics into a list of words and add it to a new column
df['tokenized_lyrics'] = df['lyrics'].apply(lambda x: word_tokenize(x))
df



In [None]:
#graph the sentiment analysis of each song
sns.set_style('darkgrid')
plt.figure(figsize=(10, 5))
plt.title('Sentiment Analysis of ' + str(album_name))
sns.barplot(x='song', y='sentiment_score', data=df)
plt.xticks(rotation=90)
plt.show()




#graph the sentiment analysis of the album including the average sentiment score
sns.set_style('darkgrid')
plt.figure(figsize=(10, 5))
sns.countplot(x='sentiment', data=df)
plt.title('Sentiment Analysis of ' + str(album_name))
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.show()
print('Average Sentiment Score: ' + str(df['sentiment_score'].mean()))

        

In [None]:
#make a word cloud of the lyrics of the album
plt.figure(figsize=(10, 5))
plt.title('Word Cloud of ' + str(album_name))
wordcloud = WordCloud(width=800, height=400, background_color='white', max_words=100).generate(' '.join(df['lyrics']))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()


In [None]:
#generate a wordcloud of the entire album
plt.figure(figsize=(10, 5))
plt.title('Word Cloud of ' + str(album_name))
wordcloud = WordCloud(width=800, height=400, background_color='white', max_words=100).generate(' '.join(df['lyrics']))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()


In [None]:
#combine all the lyrics into one string
#import Image libraries
#impot image libraries
from PIL import Image

words = ' '.join(df['lyrics'])

def read_img_from_url(url, headers=headers):
    response = requests.get(url, headers=headers)
    img = Image.open(BytesIO(response.content))
    img_matrix = np.array(img)
    return img_matrix

def read_txt(lyrics, *size):
    text = words
    #wc = WordCloud(background_color="white", max_words=100 , max_font_size=100, width=size[0], height=size[1], random_state=42)
    wc = WordCloud(max_words=2000, contour_width=3, width=size[0], height=size[1], contour_color='steelblue').generate(' '.join(df['lyrics']))
    return wc.to_array()


headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
img_url = data['cover_art_url']
img_matrix = read_img_from_url(img_url)
txt_url = words
txt_matrix = read_txt(txt_url, *img_matrix.shape)

print(img_matrix.shape, txt_matrix.shape)

In [None]:
img_matrix[txt_matrix == 100] = 0
print(img_matrix.shape)

In [None]:
#graph the wordcloud ontop of the image of the album cover
plt.figure(figsize=(10, 10), dpi=300)
plt.imshow(img_matrix)
plt.imshow(txt_matrix, alpha=0.55)
plt.axis('off')
plt.show()





In [None]:
#find the most infrequent words in the album
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
import string

stop_words = set(stopwords.words('english'))
stop_words.add('chorus')
lemmatizer = WordNetLemmatizer()
tokenizer = RegexpTokenizer(r'\w+')
words = ' '.join(df['lyrics'])
words = [lemmatizer.lemmatize(word.lower()) for word in tokenizer.tokenize(words) if word not in stop_words]
word_counts = Counter(words)
word_counts.most_common(10)
word_counts.most_common()[:-11:-1]

#graph the most infrequent words in the album
plt.figure(figsize=(10, 5))
plt.title('Most Infrequent Words in ' + str(album_name))
sns.barplot(x=[word[0] for word in word_counts.most_common()[:-11:-1]], y=[word[1] for word in word_counts.most_common()[:-11:-1]])
plt.xticks(rotation=90)
plt.show()

#list the most infrequent words in the album
print('Most Infrequent Words in ' + str(album_name))
for word in word_counts.most_common()[:-21:-1]:
    print(word[0] + ': ' + str(word[1]))



In [None]:
#find the mean sentiment analysis of the album
df['sentiment_score'].mean()

#add the mean sentiment of the album to a dataframe, include Artist name, album name, and the score
album_sentiment = pd.DataFrame({'artist': [artist], 'album': [album_name], 'sentiment_score': [df['sentiment_score'].mean()]})
album_sentiment

#export the dataframe to a csv file, and append the new data to the existing csv file
album_sentiment.to_csv('album_sentiment.csv', mode='a', header=False, index=False)

#read the csv file
album_sentiment = pd.read_csv('album_sentiment.csv')


In [93]:
#read in the albumlist.csv file into a dataframe
albumlist = pd.read_csv('albumlist.csv')
albumlist


Unnamed: 0,Number,Year,Album,Artist,Genre,Subgenre
0,1,1967,Sgt. Pepper's Lonely Hearts Club Band,The Beatles,Rock,"Rock & Roll, Psychedelic Rock"
1,2,1966,Pet Sounds,The Beach Boys,Rock,"Pop Rock, Psychedelic Rock"
2,3,1966,Revolver,The Beatles,Rock,"Psychedelic Rock, Pop Rock"
3,4,1965,Highway 61 Revisited,Bob Dylan,Rock,"Folk Rock, Blues Rock"
4,5,1965,Rubber Soul,The Beatles,"Rock, Pop",Pop Rock
...,...,...,...,...,...,...
495,496,1969,Boz Scaggs,Boz Scaggs,Rock,Pop Rock
496,497,2001,White Blood Cells,The White Stripes,Rock,"Indie Rock, Alternative Rock, Blues Rock, Gara..."
497,498,1989,The Stone Roses,The Stone Roses,Rock,Indie Rock
498,499,1971,Live in Cook County Jail,B.B. King,Blues,Electric Blues
