In [1]:
import pandas as pd
import nltk

In [2]:
from nltk.tokenize import word_tokenize # word tokenize will turn each word in a text string into a token

In [3]:
nltk.download('punkt') # punkt is a language aware model that can handle punctuation in text

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mccal\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
# i don't think there is any punctuation in the data that we are using to train this model but best to be safe

In [5]:
from nltk.corpus import stopwords
nltk.download('stopwords') # there are definitely stop words in the data we will use to build the model (words that do not have sentiment)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mccal\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer # this weighs the importance of a word based on freqeuency (feature extraction tool)

In [7]:
from sklearn.model_selection import train_test_split # we all know what this is

In [8]:
from sklearn.linear_model import LogisticRegression # this is our model of choice

In [9]:
from sklearn.metrics import accuracy_score, classification_report # we will use this to see how accurate the model is

In [10]:
emotions=pd.read_csv('emotions.csv') # reading in labeled text data to make the model

In [11]:
emotions.info() # basic data information

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416809 entries, 0 to 416808
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   text    416809 non-null  object
 1   label   416809 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 6.4+ MB


In [12]:
emotions.head() # here's what the data looks like

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,4
1,ive enjoyed being able to slouch about relax a...,0
2,i gave up my internship with the dmrg and am f...,4
3,i dont know i feel so lost,0
4,i am a kindergarten teacher and i am thoroughl...,4


In [13]:
# sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5) emotions and their label

In [14]:
emotions.label.value_counts() # we have some really unbalanced data here, this is what we adress first if we want to improve the model

label
1    141067
0    121187
3     57317
4     47712
2     34554
5     14972
Name: count, dtype: int64

In [15]:
def tokenize(text):
    return nltk.word_tokenize(text.lower()) # i am writing a function that tokenizes text that we can apply to the text column

In [16]:
emotions['tokens']=emotions['text'].apply(tokenize) # applying the tokenizer function to create a tokens column

In [17]:
stop_words = set(stopwords.words('english')) # this is going to be a set containing all of the english stop words

In [18]:
emotions['filtered_tokens'] = emotions['tokens'].apply(lambda x: [word for word in x if word not in stop_words])
# we are creating a column of tokens that do not contain any words we do not want to feed the model

In [19]:
emotions.head() # here's what our data looks like now with the new features

Unnamed: 0,text,label,tokens,filtered_tokens
0,i just feel really helpless and heavy hearted,4,"[i, just, feel, really, helpless, and, heavy, ...","[feel, really, helpless, heavy, hearted]"
1,ive enjoyed being able to slouch about relax a...,0,"[ive, enjoyed, being, able, to, slouch, about,...","[ive, enjoyed, able, slouch, relax, unwind, fr..."
2,i gave up my internship with the dmrg and am f...,4,"[i, gave, up, my, internship, with, the, dmrg,...","[gave, internship, dmrg, feeling, distraught]"
3,i dont know i feel so lost,0,"[i, dont, know, i, feel, so, lost]","[dont, know, feel, lost]"
4,i am a kindergarten teacher and i am thoroughl...,4,"[i, am, a, kindergarten, teacher, and, i, am, ...","[kindergarten, teacher, thoroughly, weary, job..."


In [20]:
emotions['text_combined'] = emotions['filtered_tokens'].apply(lambda x: ' '.join(x))

# what's happening here is we are creating a column combining the filtered tokens back together into a text string

In [21]:
vectorizer = TfidfVectorizer()
# i mentioned earlier that this is going to make the text readable for the logistic regression  model
# this is going to create vectors from our text that we can use for training
# the vectors are made with term frequency, relative to amount of documents, which in turn tells us something about their importance

In [22]:
X = vectorizer.fit_transform(emotions['text_combined']) # vectorize the text

In [23]:
y=emotions['label']

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# splitting the data into training data and testing data
# 42 was chosen arbitrarily
# random state accepts any number in range [-2,147,483,648 , 2,147,483,647]

In [25]:
model = LogisticRegression(penalty='l2', max_iter=200)  # Default is 'l2'
# i chose 200 for max_iter because it was large enough to reach convergence
# if you do not set the max_iter to 200, the default, 100 is not enough and you will get an error

In [26]:
model.fit(X_train, y_train) # fit the model

In [27]:
y_pred = model.predict(X_test) # basic prediction function call

In [28]:
accuracy = accuracy_score(y_test, y_pred)

In [29]:
accuracy # did pretty well

0.8939924665914926

In [30]:
# we aren't after the models prediction for the labels
# we are after the probability scores for each label because we want nuanced emotion vectors
# lets do a demo

In [31]:
# here is a string that i am going to write that will contain some things i want to say about my emotional state
hank_feeling = 'i had a stressfull day but i was able to get a lot done which made me proud'

In [32]:
# in order to pull an emotion vector from this string we need to apply the same principles we used to train the model
# we need to tokenize the text
# we need to filter the tokenized text for stopwords
# we need to re join that filtered token into plain text
# we need to extract features using tfidf vectorization
# we need to tell the model to predict probabilites NOT the label
# we're gonna investigate what the probabilities look like

In [33]:
hank_feeling_tokens = nltk.word_tokenize( hank_feeling.lower()) #tokenizing the sentence i wrote
hank_feeling_tokens # feels right

['i',
 'had',
 'a',
 'stressfull',
 'day',
 'but',
 'i',
 'was',
 'able',
 'to',
 'get',
 'a',
 'lot',
 'done',
 'which',
 'made',
 'me',
 'proud']

In [34]:
filtered_hank_feeling_tokens = [word for word in hank_feeling_tokens if word not in stop_words]
# remove stop words from the tokenized sentence i wrote
filtered_hank_feeling_tokens # feels right

['stressfull', 'day', 'able', 'get', 'lot', 'done', 'made', 'proud']

In [35]:
hank_feeling_combined = ' '.join(filtered_hank_feeling_tokens) # re combine the text 
hank_feeling_combined # feels right

'stressfull day able get lot done made proud'

In [36]:
tfidf_hank_feeling = vectorizer.transform([hank_feeling_combined]) # get the tfidf vector from my combined text
tfidf_hank_feeling # feels confusing but we need to do this to make the text readable for the computer

<1x75127 sparse matrix of type '<class 'numpy.float64'>'
	with 7 stored elements in Compressed Sparse Row format>

In [37]:
emotion_probabilities = model.predict_proba(tfidf_hank_feeling) # pulling the probabilities for each emotion
emotion_probabilities # what we have is an array where each element is the liklihood of an emotion

array([[0.01533646, 0.95192734, 0.01017043, 0.00952892, 0.00550737,
        0.00752947]])

In [38]:
#  recall this key; sadness (0), joy (1), love (2), anger (3), fear (4), and surprise (5) emotions and their label

In [39]:
# this still isn't entirely what we want, we want a list of probabilities that we can index

In [40]:
prob_list = emotion_probabilities.tolist()
prob_list # this is close, we really need that list in the list though

[[0.015336456086055822,
  0.9519273437595572,
  0.010170432898996905,
  0.009528920934266534,
  0.005507372710613149,
  0.007529473610510535]]

In [41]:
hank_emotion_vector = prob_list[0]
hank_emotion_vector # perfect now we have a list of emotion scores pulled from a text string that i wrote

[0.015336456086055822,
 0.9519273437595572,
 0.010170432898996905,
 0.009528920934266534,
 0.005507372710613149,
 0.007529473610510535]

In [42]:
# now let's write a function that does that whole process for us

In [43]:
def emotion_score(user_input):
    
    input_tokens = nltk.word_tokenize(user_input.lower())
    
    filtered_input_tokens = [word for word in input_tokens if word not in stop_words]
    
    input_combined = ' '.join(filtered_input_tokens)
    
    input_vector = vectorizer.transform([input_combined])
    
    probabilities = model.predict_proba(input_vector)
    
    return probabilities.tolist()[0]

In [44]:
emotion_score('i am feeling happy today') # feels right

[0.004727503627023497,
 0.9900315263103031,
 0.0032173409452982433,
 0.0010353425353012962,
 0.0007143066889650694,
 0.00027397989310886674]

In [45]:
music=pd.read_csv('songs_with_lyrics_Cleaned.csv') # i wanna run this function on an entire data frame of song lyrics

In [46]:
music.head() # here's what the data frame with music data and song lyrics

Unnamed: 0.1,Unnamed: 0,artist,song,link,text
0,0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it..."
1,1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l..."
2,2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...
3,3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...
4,4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...


In [47]:
stmf=music.text[83]

In [48]:
stmf_emotion_score=emotion_score(stmf)

In [49]:
stmf_emotion_score[5]

0.1440686713399015

In [50]:
emotions

Unnamed: 0,text,label,tokens,filtered_tokens,text_combined
0,i just feel really helpless and heavy hearted,4,"[i, just, feel, really, helpless, and, heavy, ...","[feel, really, helpless, heavy, hearted]",feel really helpless heavy hearted
1,ive enjoyed being able to slouch about relax a...,0,"[ive, enjoyed, being, able, to, slouch, about,...","[ive, enjoyed, able, slouch, relax, unwind, fr...",ive enjoyed able slouch relax unwind frankly n...
2,i gave up my internship with the dmrg and am f...,4,"[i, gave, up, my, internship, with, the, dmrg,...","[gave, internship, dmrg, feeling, distraught]",gave internship dmrg feeling distraught
3,i dont know i feel so lost,0,"[i, dont, know, i, feel, so, lost]","[dont, know, feel, lost]",dont know feel lost
4,i am a kindergarten teacher and i am thoroughl...,4,"[i, am, a, kindergarten, teacher, and, i, am, ...","[kindergarten, teacher, thoroughly, weary, job...",kindergarten teacher thoroughly weary job take...
...,...,...,...,...,...
416804,i feel like telling these horny devils to find...,2,"[i, feel, like, telling, these, horny, devils,...","[feel, like, telling, horny, devils, find, sit...",feel like telling horny devils find site suite...
416805,i began to realize that when i was feeling agi...,3,"[i, began, to, realize, that, when, i, was, fe...","[began, realize, feeling, agitated, restless, ...",began realize feeling agitated restless would ...
416806,i feel very curious be why previous early dawn...,5,"[i, feel, very, curious, be, why, previous, ea...","[feel, curious, previous, early, dawn, time, s...",feel curious previous early dawn time seek tro...
416807,i feel that becuase of the tyranical nature of...,3,"[i, feel, that, becuase, of, the, tyranical, n...","[feel, becuase, tyranical, nature, government,...",feel becuase tyranical nature government el sa...


In [51]:
len(emotions.text[0].split())

8

In [52]:
# demo in case anyone asks

In [53]:
stmf # here are the lyrics to the song slipping through my fingers, by abba

"schoolbag in hand, she leaves home in the early morning waving goodbye with an absent-minded smile i watch her go with a surge of that well known sadness and i have to sit down for a while the feeling that i'm losing her forever and without really entering her world i'm glad whenever i can share her laughter that funny little girl slipping through my fingers all the time i try to capture every minute the feeling in it slipping through my fingers all the time do i really see what's in her mind each time i think i'm close to knowing she keeps on growing slipping through my fingers all the time sleep in our eyes, her and me at the breakfast table barely awake i let precious time go by then when she's gone, there's that odd melancholy feeling and a sense of guilt i can't deny what happened to the wonderful adventures the places i had planned for us to go well, some of that we did, but most we didn't and why, i just don't know slipping through my fingers all the time i try to capture every

In [54]:
# demo for how the function handles song lyrics
emotion_score(stmf)

[0.26486169092397066,
 0.3791021911283691,
 0.0643399182728898,
 0.0771725837628346,
 0.07045494457203419,
 0.1440686713399015]

In [55]:
# demo for how the song handles user input
emotion_score(' i had a pretty rough day, can you recommend me a sad song?')

[0.734118761988165,
 0.132520592501066,
 0.029530023819752265,
 0.040496651943053505,
 0.034728021670850914,
 0.028605948077112395]

In [56]:
# write a function that vectorizes the song lyric data with an emotion score dictionary in order to create a table for the lyric scores

In [57]:
def emotion_dictionary(user_input):
    input_tokens = nltk.word_tokenize(user_input.lower())
    filtered_input_tokens = [word for word in input_tokens if word not in stop_words]
    input_combined = ' '.join(filtered_input_tokens)
    input_vector = vectorizer.transform([input_combined])
    probabilities = model.predict_proba(input_vector)
    emotion_vector = probabilities.tolist()[0]
    return {'sadness': emotion_vector[0],
    'joy': emotion_vector[1],
    'love': emotion_vector[2],
    'anger': emotion_vector[3],
    'fear': emotion_vector[4],
    'surprise': emotion_vector[5] }

In [58]:
emotion_dictionary(stmf)

{'sadness': 0.26486169092397066,
 'joy': 0.3791021911283691,
 'love': 0.0643399182728898,
 'anger': 0.0771725837628346,
 'fear': 0.07045494457203419,
 'surprise': 0.1440686713399015}

In [59]:
# time to make the score table

In [60]:
from tqdm import tqdm # import tqdm for progress bar

In [61]:
tqdm.pandas() # call this to get the progress bar

In [62]:
scores=music['text'].progress_apply(emotion_dictionary).apply(pd.Series) # get a series containing the emotion scores for every song

100%|███████████████████████████████████████████████████████████████████████████| 44795/44795 [05:40<00:00, 131.74it/s]


In [63]:
scores # this is what it looks like

Unnamed: 0,sadness,joy,love,anger,fear,surprise
0,0.037772,0.865948,0.020204,0.032962,0.023208,0.019906
1,0.066016,0.637793,0.148777,0.056284,0.058281,0.032849
2,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482
3,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435
4,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449
...,...,...,...,...,...,...
44790,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615
44791,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814
44792,0.164597,0.422916,0.090274,0.126768,0.155709,0.039737
44793,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792


In [64]:
scores.mean() # basic stats on the emotions

sadness     0.268034
joy         0.282942
love        0.135176
anger       0.151015
fear        0.116307
surprise    0.046526
dtype: float64

In [65]:
scores.median() # same as above

sadness     0.228373
joy         0.257645
love        0.103459
anger       0.132783
fear        0.101210
surprise    0.039736
dtype: float64

In [66]:
music

Unnamed: 0.1,Unnamed: 0,artist,song,link,text
0,0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it..."
1,1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l..."
2,2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...
3,3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...
4,4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...
...,...,...,...,...,...
44790,44790,Zebrahead,Mental Health,/z/zebrahead/mental+health_20747013.html,let's go the lights are on but there is no one...
44791,44791,Zebrahead,The Setup,/z/zebrahead/the+setup_10198494.html,lie to me tell me that everything will be all ...
44792,44792,Ziggy Marley,Freedom Road,/z/ziggy+marley/freedom+road_20531174.html,"that's why i'm marching, yes, i'm marching, ma..."
44793,44793,Ziggy Marley,G7,/z/ziggy+marley/g7_20531173.html,seven richest countries in the world them have...


In [67]:
scores

Unnamed: 0,sadness,joy,love,anger,fear,surprise
0,0.037772,0.865948,0.020204,0.032962,0.023208,0.019906
1,0.066016,0.637793,0.148777,0.056284,0.058281,0.032849
2,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482
3,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435
4,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449
...,...,...,...,...,...,...
44790,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615
44791,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814
44792,0.164597,0.422916,0.090274,0.126768,0.155709,0.039737
44793,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792


In [68]:
scored_music=pd.concat([music,scores],axis=1)

In [69]:
scored_music.drop(columns=['Unnamed: 0'],inplace=True)

In [70]:
scored_music.columns

Index(['artist', 'song', 'link', 'text', 'sadness', 'joy', 'love', 'anger',
       'fear', 'surprise'],
      dtype='object')

In [71]:
scored_music

Unnamed: 0,artist,song,link,text,sadness,joy,love,anger,fear,surprise
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it...",0.037772,0.865948,0.020204,0.032962,0.023208,0.019906
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l...",0.066016,0.637793,0.148777,0.056284,0.058281,0.032849
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449
...,...,...,...,...,...,...,...,...,...,...
44790,Zebrahead,Mental Health,/z/zebrahead/mental+health_20747013.html,let's go the lights are on but there is no one...,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615
44791,Zebrahead,The Setup,/z/zebrahead/the+setup_10198494.html,lie to me tell me that everything will be all ...,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814
44792,Ziggy Marley,Freedom Road,/z/ziggy+marley/freedom+road_20531174.html,"that's why i'm marching, yes, i'm marching, ma...",0.164597,0.422916,0.090274,0.126768,0.155709,0.039737
44793,Ziggy Marley,G7,/z/ziggy+marley/g7_20531173.html,seven richest countries in the world them have...,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792


In [72]:
scored_music.to_csv('scored_music.xlsx',index=False) 

In [73]:
vector=music['text'].progress_apply(emotion_score)

100%|███████████████████████████████████████████████████████████████████████████| 44795/44795 [05:39<00:00, 131.82it/s]


In [74]:
vector

0        [0.037771861392196945, 0.865948419112691, 0.02...
1        [0.0660160140254407, 0.6377930766113133, 0.148...
2        [0.5575900549063024, 0.3046922826265263, 0.064...
3        [0.13673553505589905, 0.3624368879213182, 0.26...
4        [0.9672743610382378, 0.0052580633129720306, 0....
                               ...                        
44790    [0.15238124763496208, 0.5047291126308601, 0.05...
44791    [0.4263364301563252, 0.2030869043838914, 0.030...
44792    [0.1645965162351848, 0.4229155166796501, 0.090...
44793    [0.2974464252724677, 0.2948254435155049, 0.074...
44794    [0.30711703875536295, 0.26534945267114374, 0.0...
Name: text, Length: 44795, dtype: object

In [75]:
vector

0        [0.037771861392196945, 0.865948419112691, 0.02...
1        [0.0660160140254407, 0.6377930766113133, 0.148...
2        [0.5575900549063024, 0.3046922826265263, 0.064...
3        [0.13673553505589905, 0.3624368879213182, 0.26...
4        [0.9672743610382378, 0.0052580633129720306, 0....
                               ...                        
44790    [0.15238124763496208, 0.5047291126308601, 0.05...
44791    [0.4263364301563252, 0.2030869043838914, 0.030...
44792    [0.1645965162351848, 0.4229155166796501, 0.090...
44793    [0.2974464252724677, 0.2948254435155049, 0.074...
44794    [0.30711703875536295, 0.26534945267114374, 0.0...
Name: text, Length: 44795, dtype: object

In [76]:
pd.Series(vector)

0        [0.037771861392196945, 0.865948419112691, 0.02...
1        [0.0660160140254407, 0.6377930766113133, 0.148...
2        [0.5575900549063024, 0.3046922826265263, 0.064...
3        [0.13673553505589905, 0.3624368879213182, 0.26...
4        [0.9672743610382378, 0.0052580633129720306, 0....
                               ...                        
44790    [0.15238124763496208, 0.5047291126308601, 0.05...
44791    [0.4263364301563252, 0.2030869043838914, 0.030...
44792    [0.1645965162351848, 0.4229155166796501, 0.090...
44793    [0.2974464252724677, 0.2948254435155049, 0.074...
44794    [0.30711703875536295, 0.26534945267114374, 0.0...
Name: text, Length: 44795, dtype: object

In [77]:
scored_music

Unnamed: 0,artist,song,link,text,sadness,joy,love,anger,fear,surprise
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it...",0.037772,0.865948,0.020204,0.032962,0.023208,0.019906
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l...",0.066016,0.637793,0.148777,0.056284,0.058281,0.032849
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449
...,...,...,...,...,...,...,...,...,...,...
44790,Zebrahead,Mental Health,/z/zebrahead/mental+health_20747013.html,let's go the lights are on but there is no one...,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615
44791,Zebrahead,The Setup,/z/zebrahead/the+setup_10198494.html,lie to me tell me that everything will be all ...,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814
44792,Ziggy Marley,Freedom Road,/z/ziggy+marley/freedom+road_20531174.html,"that's why i'm marching, yes, i'm marching, ma...",0.164597,0.422916,0.090274,0.126768,0.155709,0.039737
44793,Ziggy Marley,G7,/z/ziggy+marley/g7_20531173.html,seven richest countries in the world them have...,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792


In [78]:
#scored_music_w_vector=pd.concat([scored_music,pd.Series(vector)],axis=1)

In [79]:
#scored_music_w_vector

NameError: name 'scored_music_w_vector' is not defined

In [80]:
scored_music['vector']=vector

In [81]:
scored_music

Unnamed: 0,artist,song,link,text,sadness,joy,love,anger,fear,surprise,vector
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it...",0.037772,0.865948,0.020204,0.032962,0.023208,0.019906,"[0.037771861392196945, 0.865948419112691, 0.02..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l...",0.066016,0.637793,0.148777,0.056284,0.058281,0.032849,"[0.0660160140254407, 0.6377930766113133, 0.148..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482,"[0.5575900549063024, 0.3046922826265263, 0.064..."
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435,"[0.13673553505589905, 0.3624368879213182, 0.26..."
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449,"[0.9672743610382378, 0.0052580633129720306, 0...."
...,...,...,...,...,...,...,...,...,...,...,...
44790,Zebrahead,Mental Health,/z/zebrahead/mental+health_20747013.html,let's go the lights are on but there is no one...,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615,"[0.15238124763496208, 0.5047291126308601, 0.05..."
44791,Zebrahead,The Setup,/z/zebrahead/the+setup_10198494.html,lie to me tell me that everything will be all ...,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814,"[0.4263364301563252, 0.2030869043838914, 0.030..."
44792,Ziggy Marley,Freedom Road,/z/ziggy+marley/freedom+road_20531174.html,"that's why i'm marching, yes, i'm marching, ma...",0.164597,0.422916,0.090274,0.126768,0.155709,0.039737,"[0.1645965162351848, 0.4229155166796501, 0.090..."
44793,Ziggy Marley,G7,/z/ziggy+marley/g7_20531173.html,seven richest countries in the world them have...,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792,"[0.2974464252724677, 0.2948254435155049, 0.074..."


In [None]:
#scored_music.to_csv('scored_music_w_vector.csv',index=False)

In [82]:
scored_music

Unnamed: 0,artist,song,link,text,sadness,joy,love,anger,fear,surprise,vector
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it...",0.037772,0.865948,0.020204,0.032962,0.023208,0.019906,"[0.037771861392196945, 0.865948419112691, 0.02..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l...",0.066016,0.637793,0.148777,0.056284,0.058281,0.032849,"[0.0660160140254407, 0.6377930766113133, 0.148..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482,"[0.5575900549063024, 0.3046922826265263, 0.064..."
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435,"[0.13673553505589905, 0.3624368879213182, 0.26..."
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449,"[0.9672743610382378, 0.0052580633129720306, 0...."
...,...,...,...,...,...,...,...,...,...,...,...
44790,Zebrahead,Mental Health,/z/zebrahead/mental+health_20747013.html,let's go the lights are on but there is no one...,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615,"[0.15238124763496208, 0.5047291126308601, 0.05..."
44791,Zebrahead,The Setup,/z/zebrahead/the+setup_10198494.html,lie to me tell me that everything will be all ...,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814,"[0.4263364301563252, 0.2030869043838914, 0.030..."
44792,Ziggy Marley,Freedom Road,/z/ziggy+marley/freedom+road_20531174.html,"that's why i'm marching, yes, i'm marching, ma...",0.164597,0.422916,0.090274,0.126768,0.155709,0.039737,"[0.1645965162351848, 0.4229155166796501, 0.090..."
44793,Ziggy Marley,G7,/z/ziggy+marley/g7_20531173.html,seven richest countries in the world them have...,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792,"[0.2974464252724677, 0.2948254435155049, 0.074..."


In [83]:
# import the similarity algorithm for vectors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [84]:
vector1 = np.array([1, 0, 1])
vector2 = np.array([1, 1, 1])

# Reshape the vectors to be 2D (required by cosine_similarity)
vector1 = vector1.reshape(1, -1)
vector2 = vector2.reshape(1, -1)

# Calculate cosine similarity
similarity = cosine_similarity(vector1, vector2)

print("Cosine Similarity:", similarity[0][0])

Cosine Similarity: 0.816496580927726


In [85]:
similarity[0][0]

0.816496580927726

In [86]:
user_input='yesterday i watched the lakers game and i lost money on it which was rough but i had fun watching the game anyways which was good'

In [87]:
user_vec = emotion_score(user_input)

In [88]:
user_vec

[0.5396963788057415,
 0.19285173072893422,
 0.04366610469701906,
 0.09854388358690694,
 0.07520161452573766,
 0.050040287655660556]

In [89]:
stmf_vector = scored_music.vector[83]

In [90]:
stmf_vector

[0.26486169092397066,
 0.3791021911283691,
 0.0643399182728898,
 0.0771725837628346,
 0.07045494457203419,
 0.1440686713399015]

In [91]:
# try to see the cosine similarity between my user vector and the song vector

stmf_array = np.array(stmf_vector)

user_array = np.array(user_vec)

shaped_stmf = stmf_array.reshape(1,-1)

shaped_user = user_array.reshape(1,-1)

sim = cosine_similarity(shaped_stmf,shaped_user)


In [92]:
sim[0][0]

0.8104393137259611

In [93]:
type(scored_music.vector[0])

list

In [94]:
def song_similarity(user_input,song_vector):
    
    song_array = np.array(song_vector)
    u_array = np.array(user_input)
    shaped_s_array = np.array(song_array).reshape(1,-1)
    shaped_u_array = np.array(u_array).reshape(1,-1)
    similar = cosine_similarity(shaped_s_array,shaped_u_array)
    return similar[0][0]




    



In [95]:
song_similarity(stmf_vector,user_vec)

0.8104393137259611

In [96]:
stmf

"schoolbag in hand, she leaves home in the early morning waving goodbye with an absent-minded smile i watch her go with a surge of that well known sadness and i have to sit down for a while the feeling that i'm losing her forever and without really entering her world i'm glad whenever i can share her laughter that funny little girl slipping through my fingers all the time i try to capture every minute the feeling in it slipping through my fingers all the time do i really see what's in her mind each time i think i'm close to knowing she keeps on growing slipping through my fingers all the time sleep in our eyes, her and me at the breakfast table barely awake i let precious time go by then when she's gone, there's that odd melancholy feeling and a sense of guilt i can't deny what happened to the wonderful adventures the places i had planned for us to go well, some of that we did, but most we didn't and why, i just don't know slipping through my fingers all the time i try to capture every

In [97]:
emotion_score(stmf)

[0.26486169092397066,
 0.3791021911283691,
 0.0643399182728898,
 0.0771725837628346,
 0.07045494457203419,
 0.1440686713399015]

In [98]:
scored_music

Unnamed: 0,artist,song,link,text,sadness,joy,love,anger,fear,surprise,vector
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"look at her face, it's a wonderful face and it...",0.037772,0.865948,0.020204,0.032962,0.023208,0.019906,"[0.037771861392196945, 0.865948419112691, 0.02..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"take it easy with me, please touch me gently l...",0.066016,0.637793,0.148777,0.056284,0.058281,0.032849,"[0.0660160140254407, 0.6377930766113133, 0.148..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,i'll never know why i had to go why i had to p...,0.557590,0.304692,0.064213,0.023887,0.027135,0.022482,"[0.5575900549063024, 0.3046922826265263, 0.064..."
3,ABBA,Bang,/a/abba/bang_20598415.html,making somebody happy is a question of give an...,0.136736,0.362437,0.262408,0.145773,0.065213,0.027435,"[0.13673553505589905, 0.3624368879213182, 0.26..."
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,making somebody happy is a question of give an...,0.967274,0.005258,0.016713,0.006844,0.002461,0.001449,"[0.9672743610382378, 0.0052580633129720306, 0...."
...,...,...,...,...,...,...,...,...,...,...,...
44790,Zebrahead,Mental Health,/z/zebrahead/mental+health_20747013.html,let's go the lights are on but there is no one...,0.152381,0.504729,0.055719,0.141408,0.109148,0.036615,"[0.15238124763496208, 0.5047291126308601, 0.05..."
44791,Zebrahead,The Setup,/z/zebrahead/the+setup_10198494.html,lie to me tell me that everything will be all ...,0.426336,0.203087,0.030651,0.199676,0.091436,0.048814,"[0.4263364301563252, 0.2030869043838914, 0.030..."
44792,Ziggy Marley,Freedom Road,/z/ziggy+marley/freedom+road_20531174.html,"that's why i'm marching, yes, i'm marching, ma...",0.164597,0.422916,0.090274,0.126768,0.155709,0.039737,"[0.1645965162351848, 0.4229155166796501, 0.090..."
44793,Ziggy Marley,G7,/z/ziggy+marley/g7_20531173.html,seven richest countries in the world them have...,0.297446,0.294825,0.074738,0.199254,0.100945,0.032792,"[0.2974464252724677, 0.2948254435155049, 0.074..."


In [99]:
user_vec

[0.5396963788057415,
 0.19285173072893422,
 0.04366610469701906,
 0.09854388358690694,
 0.07520161452573766,
 0.050040287655660556]

In [113]:

comparison = []
for i in range(len(scored_music)):
    
    user_vs_song=[]
    cs = song_similarity(user_vec,scored_music['vector'][i])
    song_name = scored_music['song'][i]
    artist_name = scored_music['artist'][i]
    
    user_vs_song.append(cs)
    user_vs_song.append(song_name)
    user_vs_song.append(artist_name)
    comparison.append(user_vs_song)

#comparison
    

In [101]:
comp_df=pd.DataFrame(comparison,columns=['similarity','track','artist'])

In [102]:
comp_df

Unnamed: 0,similarity,track,artist
0,0.379167,Ahe's My Kind Of Girl,ABBA
1,0.450934,"Andante, Andante",ABBA
2,0.974319,As Good As New,ABBA
3,0.601626,Bang,ABBA
4,0.919065,Bang-A-Boomerang,ABBA
...,...,...,...
44790,0.622597,Mental Health,Zebrahead
44791,0.968883,The Setup,Zebrahead
44792,0.671603,Freedom Road,Ziggy Marley
44793,0.877945,G7,Ziggy Marley


In [103]:
comp_df.sort_values(by='similarity',ascending=False)

Unnamed: 0,similarity,track,artist
31264,0.999245,Drums,Johnny Cash
38521,0.999129,Broken Thing,Point Of Grace
4167,0.998880,Breakaway,Donna Summer
27522,0.998843,Entangled,Genesis
2943,0.998817,My Woman,Chuck Berry
...,...,...,...
30424,0.074030,"Love Me, Lovely",Jackson Browne
23277,0.074018,One Sweet Tender Touch,Chris Rea
20747,0.074017,Tender Is The Night,Andy Williams
15078,0.074007,Hot In Here,Rascal Flatts


In [104]:
scored_music.loc[31264]

artist                                            Johnny Cash
song                                                    Drums
link                       /j/johnny+cash/drums_20188181.html
text        from the indian reservation to the governmenta...
sadness                                               0.53476
joy                                                   0.18721
love                                                 0.057187
anger                                                0.108818
fear                                                 0.075951
surprise                                             0.036073
vector      [0.5347600909353069, 0.187210201418098, 0.0571...
Name: 31264, dtype: object

In [106]:
# demo 

In [107]:
user_input

'yesterday i watched the lakers game and i lost money on it which was rough but i had fun watching the game anyways which was good'

In [111]:
stmf

"schoolbag in hand, she leaves home in the early morning waving goodbye with an absent-minded smile i watch her go with a surge of that well known sadness and i have to sit down for a while the feeling that i'm losing her forever and without really entering her world i'm glad whenever i can share her laughter that funny little girl slipping through my fingers all the time i try to capture every minute the feeling in it slipping through my fingers all the time do i really see what's in her mind each time i think i'm close to knowing she keeps on growing slipping through my fingers all the time sleep in our eyes, her and me at the breakfast table barely awake i let precious time go by then when she's gone, there's that odd melancholy feeling and a sense of guilt i can't deny what happened to the wonderful adventures the places i had planned for us to go well, some of that we did, but most we didn't and why, i just don't know slipping through my fingers all the time i try to capture every

In [109]:
stmf_vector

[0.26486169092397066,
 0.3791021911283691,
 0.0643399182728898,
 0.0771725837628346,
 0.07045494457203419,
 0.1440686713399015]

In [112]:
song_similarity(emotion_score(user_input),stmf_vector)

0.8104393137259611

In [115]:
emotion_score('i feel bad i feel bad')

[0.9975793155569983,
 0.001418035305696905,
 7.066067500300207e-05,
 0.0005128585485893484,
 0.0003862727789887728,
 3.285713472341105e-05]