In [None]:
import re 
from collections import Counter
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", color_codes=True)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
def read_data(file):
    data = []
    with open(file, 'r')as f:
        for line in f:
            line = line.strip()
            label = ' '.join(line[1:line.find("]")].strip().split())
            text = line[line.find("]")+1:].strip()
            data.append([label, text])
    return data

file = '/content/dataset.txt'
data = read_data(file)
print("Number of instances: {}".format(len(data)))

Number of instances: 7480


In [None]:
def ngram(token, n): 
    output = []
    for i in range(n-1, len(token)): 
        ngram = ' '.join(token[i-n+1:i+1])
        output.append(ngram) 
    return output

def create_feature(text, nrange=(1, 1)):
    text_features = [] 
    text = text.lower() 
    text_alphanum = re.sub('[^a-z0-9#]', ' ', text)
    for n in range(nrange[0], nrange[1]+1): 
        text_features += ngram(text_alphanum.split(), n)    
    text_punc = re.sub('[a-z0-9]', ' ', text)
    text_features += ngram(text_punc.split(), 1)
    return Counter(text_features)

In [None]:
def convert_label(item, name): 
    items = list(map(float, item.split()))
    label = ""
    for idx in range(len(items)): 
        if items[idx] == 1: 
            label += name[idx] + " "
    
    return label.strip()

emotions = ["joy", 'fear', "anger", "sadness", "anxious", "shame", "guilt"]

X_all = []
y_all = []
for label, text in data:
    y_all.append(convert_label(label, emotions))
    X_all.append(create_feature(text, nrange=(1, 4)))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size = 0.2, random_state = 123)

def train_test(clf, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)
    train_acc = accuracy_score(y_train, clf.predict(X_train))
    test_acc = accuracy_score(y_test, clf.predict(X_test))
    return train_acc, test_acc

from sklearn.feature_extraction import DictVectorizer
vectorizer = DictVectorizer(sparse = True)
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)


In [None]:
svc = SVC()
lsvc = LinearSVC(random_state=123)
rforest = RandomForestClassifier(random_state=123)
dtree = DecisionTreeClassifier()

clifs = [svc, lsvc, rforest, dtree]

# train and test them 
print("| {:25} | {} | {} |".format("Classifier", "Training Accuracy", "Test Accuracy"))
print("| {} | {} | {} |".format("-"*25, "-"*17, "-"*13))
for clf in clifs: 
    clf_name = clf.__class__.__name__
    train_acc, test_acc = train_test(clf, X_train, X_test, y_train, y_test)
    print("| {:25} | {:17.7f} | {:13.7f} |".format(clf_name, train_acc, test_acc))

| Classifier                | Training Accuracy | Test Accuracy |
| ------------------------- | ----------------- | ------------- |
| SVC                       |         0.9069184 |     0.4485294 |




| LinearSVC                 |         0.9984960 |     0.5608289 |
| RandomForestClassifier    |         0.9984960 |     0.5467914 |
| DecisionTreeClassifier    |         0.9984960 |     0.4518717 |


In [None]:
l = ["joy", 'fear', "anger", "sadness", "anxious", "shame", "guilt"]
l.sort()
label_freq = {}
for label, _ in data: 
    label_freq[label] = label_freq.get(label, 0) + 1

# print the labels and their counts in sorted order 
for l in sorted(label_freq, key=label_freq.get, reverse=True):
    print("{:10}({})  {}".format(convert_label(l, emotions), l, label_freq[l]))

joy       (1. 0. 0. 0. 0. 0. 0.)  1084
anger     (0. 0. 1. 0. 0. 0. 0.)  1080
sadness   (0. 0. 0. 1. 0. 0. 0.)  1079
fear      (0. 1. 0. 0. 0. 0. 0.)  1078
anxious   (0. 0. 0. 0. 1. 0. 0.)  1057
guilt     (0. 0. 0. 0. 0. 0. 1.)  1057
shame     (0. 0. 0. 0. 0. 1. 0.)  1045


In [None]:
predicted_emotion = {"joy":"1", "fear":"2", "anger":"3", "sadness":"4", "anxious":"5", "shame":"6", "guilt":"7"}
t1 = "under the greenwood tree i sleep"
texts = [t1]
for text in texts: 
    features = create_feature(text, nrange=(1, 4))
    features = vectorizer.transform(features)
    prediction = clf.predict(features)[0]
    print( text,predicted_emotion[prediction])

under the greenwood tree i sleep 7


In [None]:
df = pd.read_csv('/content/music_dataset.csv')

In [None]:
df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
0,https://www.last.fm/music/dope/_/die%2bmf%2bdie,Die MF Die,Dope,['angry'],3.771176,5.348235,5.441765,b9eb3484-5e0e-4690-ab5a-ca91937032a5,5bU4KX47KqtDKKaLM4QCzh,metal
1,https://www.last.fm/music/deftones/_/7%2bwords,7 Words,Deftones,['angry'],3.807121,5.473939,4.729091,1a826083-5585-445f-a708-415dc90aa050,6DoXuH326aAYEN8CnlLmhP,nu metal
2,https://www.last.fm/music/fiona%2bapple/_/limp,Limp,Fiona Apple,['angry'],3.737211,5.610204,4.626735,4435982c-b83e-4daa-af2b-9f3430036bb7,104YdibC7VQy78xAVmgRYr,singer-songwriter
3,https://www.last.fm/music/black%2bflag/_/depre...,Depression,Black Flag,['angry'],3.259444,5.203056,4.422778,585398ed-1275-4579-9451-e8dd7db9d59c,1qxkzHlZBXFv5HfyYqJ8cy,punk
4,https://www.last.fm/music/pendulum/_/comprachicos,Comprachicos,Pendulum,['angry'],5.754167,5.565333,5.8365,a3c325ce-fac4-42b9-85da-b3c9e0f243af,2ZIJUwprFZrAaZCRKYfAno,industrial


In [None]:
df['seeds'].unique()

array(["['angry']", "['anxious']", "['guilt']", "['sadness']", "['fear']",
       "['joyous']", "['shame']"], dtype=object)

In [None]:
df["seeds"].value_counts()

['joyous']     6480
['sadness']    2975
['angry']      1248
['anxious']     419
['fear']        366
['shame']       194
['guilt']        39
Name: seeds, dtype: int64

In [None]:
df["seeds"]=df["seeds"].map({"['joyous']":1,"['fear']":2,"['angry']":3,"['sadness']":4,"['anxious']":5,"['shame']":6,"['guilt']":7})

In [None]:
df['seeds'].unique()

array([3, 5, 7, 4, 2, 1, 6])

In [None]:
df

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
0,https://www.last.fm/music/dope/_/die%2bmf%2bdie,Die MF Die,Dope,3,3.771176,5.348235,5.441765,b9eb3484-5e0e-4690-ab5a-ca91937032a5,5bU4KX47KqtDKKaLM4QCzh,metal
1,https://www.last.fm/music/deftones/_/7%2bwords,7 Words,Deftones,3,3.807121,5.473939,4.729091,1a826083-5585-445f-a708-415dc90aa050,6DoXuH326aAYEN8CnlLmhP,nu metal
2,https://www.last.fm/music/fiona%2bapple/_/limp,Limp,Fiona Apple,3,3.737211,5.610204,4.626735,4435982c-b83e-4daa-af2b-9f3430036bb7,104YdibC7VQy78xAVmgRYr,singer-songwriter
3,https://www.last.fm/music/black%2bflag/_/depre...,Depression,Black Flag,3,3.259444,5.203056,4.422778,585398ed-1275-4579-9451-e8dd7db9d59c,1qxkzHlZBXFv5HfyYqJ8cy,punk
4,https://www.last.fm/music/pendulum/_/comprachicos,Comprachicos,Pendulum,3,5.754167,5.565333,5.836500,a3c325ce-fac4-42b9-85da-b3c9e0f243af,2ZIJUwprFZrAaZCRKYfAno,industrial
...,...,...,...,...,...,...,...,...,...,...
11716,https://www.last.fm/music/damien%2bjurado/_/to...,Tonight I Will Retire,Damien Jurado,6,3.237203,1.708644,3.450424,d5543bf4-7dde-43b1-b295-3b31cbbd7ece,1sTTdNPs7yux7PaecQbZvu,indie
11717,https://www.last.fm/music/the%2bhandsome%2bfam...,24-Hour Store,The Handsome Family,6,4.453333,1.873333,4.666667,8599b167-460e-4350-a294-7cde37fb8eb2,33Gr3C7uDIkqYjTP28t0oV,folk
11718,https://www.last.fm/music/henry%2bmancini/_/da...,Days of Wine and Roses,Henry Mancini,6,4.394737,1.848684,4.605263,5b46c717-13d9-4793-a98e-8e175424c3e2,2sWRC12vYVTHjdTD2WOE4v,jazz
11719,https://www.last.fm/music/of%2bmontreal/_/girl...,girl from new york,of Montreal,6,3.946667,2.020000,4.183333,,,indie


In [None]:
joy_df = df.loc[df['seeds'] == 1]
fear_df = df.loc[df['seeds'] == 2]
angry_df = df.loc[df['seeds'] == 3]
sad_df = df.loc[df['seeds'] == 4]
anxious_df = df.loc[df['seeds'] == 5]
shame_df = df.loc[df['seeds'] == 6]
guilty_df = df.loc[df['seeds'] == 7]


In [None]:
joy_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
3504,https://www.last.fm/music/gustavo%2bcerati/_/f...,Fuerza Natural,Gustavo Cerati,1,6.86,2.22,6.445,566049ec-2f93-4dac-9e6c-6c8e48ff072f,4nISKMNTbWKrZpGFI44pIz,rock
3505,https://www.last.fm/music/megafaun/_/volunteers,Volunteers,Megafaun,1,7.25,2.49,7.09,d86e4821-d7a3-43cb-a422-2001995bf910,,
3506,https://www.last.fm/music/abbey%2blincoln/_/bl...,Blue Monk,Abbey Lincoln,1,4.79716,3.596173,5.902963,b7dfbf27-6178-4919-8d45-135792e734ae,4x1jTdzb9Fem58fWFTxZGV,jazz
3507,https://www.last.fm/music/delta%2bgoodrem/_/lo...,Longer,Delta Goodrem,1,7.075556,4.253333,6.348889,96f57384-990e-4d0f-949b-03ae8a6a60ce,1B3Xyk6JX1jDYb0n2f6KEX,pop
3508,https://www.last.fm/music/de-phazz/_/better%2b...,Better World,De-Phazz,1,6.9875,3.505,6.22,32309e52-5ffa-41c5-9cd9-396227e74792,4l04FxXi5I7ylSCCDeaouF,lounge


In [None]:
fear_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
2203,https://www.last.fm/music/ludo/_/the%2bhorror%...,The Horror Of Our Love,Ludo,2,5.354057,4.68717,4.515094,,1j7q33fQyWZEDsG4Q1nIDr,indie
2204,https://www.last.fm/music/nico/_/you%2bforgot%...,You Forgot To Answer,Nico,2,4.966667,4.106667,4.573333,1dd5fa8c-a91b-4bcf-b905-89544ff6535e,29LXy8UOHxCH5LXvWUmsKP,experimental
2205,https://www.last.fm/music/chelsea%2bwolfe/_/yo...,You Are My Sunshine,Chelsea Wolfe,2,0.857612,1.555373,0.885522,9425f09a-670d-4717-b500-121523317ba5,,folk
2206,https://www.last.fm/music/nautilus%2bpompilius...,К Элоизе,Nautilus Pompilius,2,1.693333,1.363333,1.536667,b1995771-4a46-46f1-9b66-00f904742417,4o5dwmXOna08FfUdcFK6cS,russian rock
2207,https://www.last.fm/music/jill%2btracy/_/pulli...,Pulling Your Insides Out,Jill Tracy,2,3.86,2.96675,3.63625,93ef16cd-180d-4a92-9073-395f0583d515,5hVwH7zo0elyB4At5KV9PP,dark cabaret


In [None]:
angry_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
0,https://www.last.fm/music/dope/_/die%2bmf%2bdie,Die MF Die,Dope,3,3.771176,5.348235,5.441765,b9eb3484-5e0e-4690-ab5a-ca91937032a5,5bU4KX47KqtDKKaLM4QCzh,metal
1,https://www.last.fm/music/deftones/_/7%2bwords,7 Words,Deftones,3,3.807121,5.473939,4.729091,1a826083-5585-445f-a708-415dc90aa050,6DoXuH326aAYEN8CnlLmhP,nu metal
2,https://www.last.fm/music/fiona%2bapple/_/limp,Limp,Fiona Apple,3,3.737211,5.610204,4.626735,4435982c-b83e-4daa-af2b-9f3430036bb7,104YdibC7VQy78xAVmgRYr,singer-songwriter
3,https://www.last.fm/music/black%2bflag/_/depre...,Depression,Black Flag,3,3.259444,5.203056,4.422778,585398ed-1275-4579-9451-e8dd7db9d59c,1qxkzHlZBXFv5HfyYqJ8cy,punk
4,https://www.last.fm/music/pendulum/_/comprachicos,Comprachicos,Pendulum,3,5.754167,5.565333,5.8365,a3c325ce-fac4-42b9-85da-b3c9e0f243af,2ZIJUwprFZrAaZCRKYfAno,industrial


In [None]:
sad_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
1288,https://www.last.fm/music/pavement/_/grounded,Grounded,Pavement,4,3.783407,2.735165,3.863077,8bcdb205-d5e2-4f8d-b8f7-6073fadbc3b3,70DNht3RyZ2SSied9TdJV8,indie rock
1289,https://www.last.fm/music/aphex%2btwin/_/father,Father,Aphex Twin,4,3.15,3.32,3.15,8925fb7f-249c-4240-ac77-cb42193c7d74,0mjBKvFwLHw0h1ACwkB8lP,piano
1290,https://www.last.fm/music/david%2bbowie/_/the%...,The Secret Life of Arabia,David Bowie,4,4.004,4.016,3.72,bd04833b-e811-491f-bfb1-c0b3ee6cc437,4F1V29ygbAKbKCQDgt4z9f,rock
1291,https://www.last.fm/music/korn/_/kill%2byou,Kill You,Korn,4,2.795,4.26,3.24,072713cc-b7d9-4678-b6c5-3eb14895f563,3b0meCmg4rf2jSpMSxdc1I,alternative
1292,https://www.last.fm/music/banks/_/change,Change,Banks,4,4.744118,3.916471,4.549412,ab705d4d-9d37-4073-9433-f0d8c98069c6,4VHUOjHviVjv2V37QoXTNb,electronic


In [None]:
anxious_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
306,https://www.last.fm/music/traffic/_/shoot%2bou...,Shoot Out At The Fantasy Factory,Traffic,5,5.744,5.33,5.588,15e49e5d-4e0b-43cb-b52a-49269ad5de3a,6waIDGD8KburJSTvbbtmr7,classic rock
8443,https://www.last.fm/music/shawn%2bmendes/_/ner...,Nervous,Shawn Mendes,5,5.78,5.435,4.97,,5wxurz8J6YlyQRNnGN8zXS,pop
8444,https://www.last.fm/music/skindred/_/trouble,Trouble,Skindred,5,3.56,5.51,4.02,a4188f48-cb54-4b55-96bc-6893f69ea9df,1WjoyUWrtiT3w9P1T4eQIM,metal
8445,https://www.last.fm/music/young%2bmarble%2bgia...,Credit In The Straight World,Young Marble Giants,5,5.226829,4.576341,4.902439,7262225b-7b55-43b3-901d-3d1cd392b6db,3VqtRYRyPFubJlt46axPhr,post-punk
8446,https://www.last.fm/music/bright%2beyes/_/satu...,Saturday as Usual,Bright Eyes,5,3.085,2.9525,3.715,db95e152-1142-4c15-9a10-231f840dffce,3wn73bOuYrCuemehRfLosN,indie rock


In [None]:
shame_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
11527,https://www.last.fm/music/blind%2bpilot/_/poor...,Poor Boy,Blind Pilot,6,5.96766,2.735319,6.122021,27dc4951-fcf3-468d-abfb-1bf23ba70c8c,79iq5rKuKqoszPDcg9Bahn,folk
11528,https://www.last.fm/music/ra%2bra%2briot/_/win...,Winter '05,Ra Ra Riot,6,4.965091,3.256545,4.794909,56a5c2de-3be3-4ca5-85ad-de9bd302060f,2dDVzdnFiH0KfSMMuocfyW,indie
11529,https://www.last.fm/music/lykke%2bli/_/never%2...,Never Gonna Love Again,Lykke Li,6,3.4975,4.1675,4.109643,2bb5ebd2-9256-41da-823e-1aae814b9779,260P406V4ys84uDOFGfcMA,indie pop
11530,https://www.last.fm/music/sonic%2byouth/_/jc,JC,Sonic Youth,6,6.39,4.72875,5.495,d379d277-bfc2-4b43-a59d-eb632e2cceaa,1xQ5HGo6FWq2dvpGUlN91m,rock
11531,https://www.last.fm/music/she%2b%2526%2bhim/_/...,Brand New Shoes,She & Him,6,5.069487,3.671795,4.446923,ef617164-631f-450d-9b5f-bf8eb6ee3027,5bRFQj1NhNJciw1rzBPNTo,indie


In [None]:
guilty_df.head()

Unnamed: 0,links,track,artist,seeds,valence_tags,arousal_tags,dominance_tags,mbid,spotify_id,genre
1249,https://www.last.fm/music/bright%2beyes/_/padr...,Padraic My Prince,Bright Eyes,7,2.95287,3.963994,3.448284,af69a0aa-06ef-4bde-ac93-a295ee847266,2mI4a89s5XvZDfmEVDdv8i,indie
1250,https://www.last.fm/music/the%2bataris/_/your%...,Your Boyfriend Sucks,The Ataris,7,3.366667,2.95,3.253333,3e9da890-0391-4654-820b-0ec9c3da2330,00qg7B6HIDhIqtmJbXz25c,punk
1251,https://www.last.fm/music/train/_/you%2balread...,You Already Know,Train,7,5.926471,4.857647,5.520588,96132d55-74a0-455a-ac3b-4e509c344c60,3ornutjAiLHXmngzQN68PI,rock
1252,https://www.last.fm/music/bright%2beyes/_/let%...,Let's Not Shit Ourselves (To Love And Be Loved),Bright Eyes,7,4.914211,4.574211,5.045263,,4ef4J3MEiAYAkhRM8Kj7eI,indie
1253,https://www.last.fm/music/sparta/_/the%2bmost%...,The Most Vicious Crime,Sparta,7,3.585,3.165,3.31,8a0dd208-c0f2-488b-811c-a7ab34f95f11,6mZpR3756gnPNWGjvthGCZ,alternative rock


In [None]:
emotion_word=predicted_emotion[prediction]

In [None]:
emotion_word

'7'

In [None]:
def get_results(emotion_word):
  joy_set=[]
  sad_set=[]
  angry_set=[]
  anxious_set=[]
  guilty_set=[]
  fear_set=[]
  shame_set=[]
  if emotion_word=='1':
      joy_set.append(df[df['seeds']==1]['track'])
      return joy_df.sample(1)
  if emotion_word=='2':
      fear_set.append(df[df['seeds']==2]['track'])
      return fear_df.sample(1)
  if emotion_word=='3':
      angry_set.append(df[df['seeds']==3]['track'])
      return angry_df.sample(1)
  if emotion_word=='4':
      sad_set.append(df[df['seeds']==4]['track'])
      return sad_df.sample(1)
  if emotion_word=='5':
      anxious_set.append(df[df['seeds']==5]['track'])
      return anxious_df.sample(1)
  if emotion_word=='6':
      shame_set.append(df[df['seeds']==6]['track'])
      return shame_df.sample(1)
  else:
      guilty_set.append(df[df['seeds']==7]['track'])
      return guilty_df.sample(1)

In [None]:
print(get_results(emotion_word))

                                               links        track artist  \
1271  https://www.last.fm/music/nits/_/two%2bskaters  Two Skaters   Nits   

      seeds  valence_tags  arousal_tags  dominance_tags  \
1271      7          2.86         2.045           2.555   

                                      mbid              spotify_id genre  
1271  791d884c-ce45-45cd-9b4d-914f648ebae4  1N7i5FGCibqWxTBwFeeGLC   pop  
