In [34]:
import numpy as np
import pandas as pd
from numpy.linalg import norm

In [35]:
# change the file path here
file_path_base = r"/home/dani/Dropbox/JKU/Semester-7/Multimedia_Search_and_Retrieval/Excercises/MMSR-Group-C/1/"
file_path_info = file_path_base + "id_information_mmsr.tsv" 
file_path_word2vec = file_path_base + "id_lyrics_word2vec_mmsr.tsv"
file_path_tfidf = file_path_base + "id_lyrics_tf-idf_mmsr.tsv"
file_path_bert = file_path_base + "id_lyrics_bert_mmsr.tsv"

df_info = pd.read_table(file_path_info)
df_word2vec = pd.read_table(file_path_word2vec)
df_tfidf = pd.read_table(file_path_tfidf)
df_bert = pd.read_table(file_path_bert)

df_info

Unnamed: 0,id,artist,song,album_name
0,01Yfj2T3YTwJ1Yfy,We As Human,Take The Bullets Away (feat. Lacey Sturm),We As Human
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition)
2,01rMxQv6vhyE1oQX,Against the Current,Chasing Ghosts,In Our Bones
3,02RGE9FNH65RtMS7,Barthezz,Infected,Trance - The Early Years (1997-2002)
4,02ZnlCGZEbkfCDxo,Laura Pausini,Tra Te E Il Mare,The Best of Laura Pausini - E Ritorno Da Te
...,...,...,...,...
10090,zyzILCQvVeUFIINi,Crowded House,When You Come,Temple Of Low Men
10091,zzgS4ZqyswamEWNj,Britney Spears,My Only Wish (This Year),Platinum Christmas
10092,zzoFYDMlqU1X2zz1,Thundercat,DUI,Drunk
10093,zzpkRCGA5ud8q4mv,Otis Redding,Rock Me Baby,Otis Blue


In [36]:
# Random base line

def random_baseline(info: pd.DataFrame, title: str, artist: str, n: int) -> pd.DataFrame:
    return info.sample(n=n)[["id","artist","song"]]

random_baseline(df_info, "Jingle Bells", "Frank Sinatra", 10)

Unnamed: 0,id,artist,song
278,1TdDpmXA1QWe0F1T,Green Day,St. Jimmy
9560,wf4gB1ShcR6STumY,Slipknot,Wait and Bleed
5457,Xbnc3y7iGCDkJ2Be,David Bisbal,Premonición
2585,FipfdKTiPkIugYMy,Europe,Rock the Night
8811,sAA5dJb3sNXFrMcm,Dirty Projectors,Cool Your Heart
748,4Rq3yuzopV6EOMrp,Piotr Rogucki,Wizja dźwięku
4827,TZ2IPg8YwxDCcyua,Evanescence,The Only One
1061,6NNmYh1zeixjYgqx,Stevie B,Spring Love
6450,dvfjEpKVTKFa4ykG,Katatonia,Sold Heart
7729,lYZDqRYVV573BRz4,mewithoutYou,"Julia (or, 'Holy to the LORD' on the Bells of ..."


In [37]:
random_baseline(df_info, "Shape of You", "Ed Sheeran", 10)

Unnamed: 0,id,artist,song
9730,xYV6Ac6m4otPj7sD,Manilla Road,The Veils of Negative Existence
6684,fI0rk5LL5l6NyCeh,Mazzy Star,California
3598,M4WUwd7i57cfQvZW,Manowar,Kingdom Come
4469,RQJ8biiC0Ed7MKyY,Shakira,Sale el Sol
4750,T4XEwusk9mHBs2mi,Alter Bridge,In Loving Memory
2826,HAyKV0B6Ty1rA6e4,Lisa Germano,Candy
1937,BeqFueRPiZbqOpHD,Metric,Calculation Theme
2372,ELY2iJ98YMasAAW1,Charon,Colder
3596,M4IrvSFYcTDYOZpu,The Pussycat Dolls,Beep
3396,Kpbzp06WpfKQWu5v,Inna,My Dreams


In [38]:
random_baseline(df_info, "Natural", "Imagine Dragons", 10)

Unnamed: 0,id,artist,song
9783,xvXrMH2YnlH77Y5i,Depeche Mode,Behind the Wheel
7938,mrmtGytnUDYGOUOH,The Black Crowes,Bad Luck Blue Eyes Goodbye
10090,zyzILCQvVeUFIINi,Crowded House,When You Come
5392,X9UmKzxyEDZ0Oscc,Enya,One by One
7711,lUgSuAl7kvy7MSon,New Kids on the Block,You Got It (The Right Stuff)
3265,K1HscSosu2LAfq59,Mr.Kitty,Neglect
1484,8qsKHsA7akNyFo0N,Living Colour,I Want to Know
5193,Vx7KpqO40dAp7SY5,Kraftwerk,Radioactivity - 2009 Remastered Version
7858,mNQNaz6dTkZUtnA3,Bob Dylan,Three Angels
746,4QtPdgfD37ulMNZa,Sylvan Esso,Hey Mami


In [39]:
def cos_sim(query: [int], target: [int]) -> int:
    return np.dot(query,target)/(norm(query)*norm(target))

def euc_sim(query: [int], target: [int]) -> int:
    return 1/(1+norm(query-target))
  
def text_based_retrieval(info: pd.DataFrame, feature: pd.DataFrame, title: str, artist: str, n: int, sim_func = cos_sim) -> pd.DataFrame:
    feature_no_id = feature.drop(columns="id") # drop id column for similarity measurement
    query = info[(info["artist"] == artist) & (info["song"] == title)]["id"].values[0] # search for song in info
    query = feature[feature["id"]==query].drop(columns="id") # get feature vector for song
    sims = [sim_func(query, target) for target in feature_no_id.values] # compute similarity between query and each target song

    info["sim"] = sims
    info_sorted=info.sort_values(by=["sim"], ascending=False)
    return info_sorted[["id","artist","song"]][1:n+1] # skips the first row, because it is the query track 
    

In [40]:
# Text-based(cos-sim, tf-idf)
text_based_retrieval(df_info, df_tfidf, "Jingle Bells", "Frank Sinatra", 10)

Unnamed: 0,id,artist,song
2778,GvR3ihpANzPFUV9q,Ryan Star,We Might Fall
6220,cZq0NEOrY3Ub5Bqc,All Time Low,"Merry Christmas, Kiss My Ass"
1299,7o67ctY7Qo945yfv,The Smashing Pumpkins,The Everlasting Gaze
8221,oUaMwbrwMD9grrIA,Mayhem,Crystalized Pain In Deconstruction
5240,WESZfNkEXTSzBsaH,Love Generation,Dance Alone
8349,pH97idDxXVlnq3xH,Dixie Chicks,Loving Arms
8105,nkmwTJvccNwNK8mo,Suede,The Chemistry Between Us
9543,wYoJBT9Lg1bBf8Nd,Moby,One Of These Mornings
2856,HMvRdVsvXUPwROWw,Title Fight,Liar's Love
4039,OqVFiTyVf5wR5FNq,Brandi Carlile,Late Morning Lullaby


In [41]:
text_based_retrieval(df_info, df_tfidf, "Shape of You", "Ed Sheeran", 10)


Unnamed: 0,id,artist,song
4424,R97YjDivI6ZdnITw,Smino,TEQUILA MOCKINGBIRD
3388,KnDiMNf6sE7hLTCq,Sam Cooke,Summertime
1109,6ePNRM6rbQuTM5XJ,Joss Stone,Free Me
4536,RkhV6fYEL032N6zQ,Eyes Set to Kill,The World Outside
2431,EhL8ZaVhOKqhAJID,Nitzer Ebb,Let Your Body Learn
8664,r6fF5pebWtAmdM62,Cher,Runaway
1530,97drm5mZVuk1HdEF,BTS,Miss Right
4044,OtAKDTOfnqax8PbN,Converge,Bitter and Then Some
1554,9E4Zp3b9190yQ6RM,Catherine Wheel,I Confess
9654,x8oyR0rY6NAggZ1A,Blackberry Smoke,Ain't Much Left of Me


In [42]:
text_based_retrieval(df_info, df_tfidf, "Natural", "Imagine Dragons", 10)

Unnamed: 0,id,artist,song
9558,weNRxMeOx4eAOJX6,Barão Vermelho,Baby Suporte
3258,JyMlKHY9RjcJKjQh,Avril Lavigne,How You Remind Me
2239,DQBT7O5kUHcrh9J4,ABBA,"One Man, One Woman"
4964,UT4kkMWMoCPe7nvA,Bob Dylan,Tomorrow Is a Long Time
2692,GTZ9KwLu7xO79fkt,Gregory Porter,But Beautiful
3542,Lm1lBPHIoKVQTx82,Heart,Dreamboat Annie
3191,JSaX8fVllx7DBjp6,The Amity Affliction,O.M.G.I.M.Y.
5464,Xeg0EdihyYTTUHpr,Streetlight Manifesto,Everything Went Numb
8396,pYhUOR925maoe3Cy,The Icicle Works,Birds Fly (Whisper to a Scream)
7762,lnWKKmAEPt8KMlmy,Madonna,Sanctuary


In [43]:
# Text-based(cos-sim, bert)
text_based_retrieval(df_info, df_bert, "Jingle Bells", "Frank Sinatra", 10)


Unnamed: 0,id,artist,song
2778,GvR3ihpANzPFUV9q,Ryan Star,We Might Fall
4257,QBGxad2tXIjeAZmP,Nevilton,Pressuposto
3128,J3Y3E5wh2TcJcUGM,Steel Panther,Death To All But Metal
1379,8DxVQ1vrjsNTIqOv,New Found Glory,My Friends Over You
2423,EeUoJbeUQNiHGJeX,Angra,Caveman
1758,AZGxAypdz7mBmx1R,Emilie Autumn,Opheliac
1756,AYxLbkF55hvueDLM,Gloria Groove,Coisa Boa
9804,y3WWGqthYTQjP7uM,Eddie Vedder,Goodbye
451,2ZFMLcQ9rWNndcLi,Jane Weaver,Don't Take My Soul
5420,XLT9XQT48DfHAZEc,Elton John,No Shoe Strings On Louise


In [44]:
text_based_retrieval(df_info, df_bert, "Shape of You", "Ed Sheeran", 10)


Unnamed: 0,id,artist,song
4884,Tvum0hngmyHoIJjN,BROCKHAMPTON,MILK
2252,DUgxTfu1TneqtqoH,Brandy,Long Distance
1511,8yKfPMz80fHBnj7z,Jennifer Hudson,Remember Me
7450,jzrOnpAxDnM3ZKI2,Yungblud,"I Love You, Will You Marry Me"
6503,eJ3e8SDSil49pvRq,Frank Sinatra,All Or Nothing At All
9535,wTndzu5HtXRd2ajJ,Mina,Il cielo in una stanza
6716,fSTi8VjZsX4hmw9g,The Who,Sensation
9,06HvNTU9M9lnH71I,Michael Bublé,That's All
6136,c6xpIU8h8Tqt020P,The White Stripes,Effect and Cause
5115,VRiXAx7EAAoQrzVt,Bloodbath,Cry My Name


In [45]:
text_based_retrieval(df_info, df_bert, "Natural", "Imagine Dragons", 10)

Unnamed: 0,id,artist,song
4505,RdIb9JxJD3y86vk1,Soviet Soviet,No Lesson
9874,yW7HaFbUjGvjhptq,Borknagar,Winter Thrice
3799,NLzUGGa38R584cro,Smile Empty Soul,Bottom of a Bottle
1117,6iiSfk5dDk9nCKVv,Nas,Bonjour
1356,88p9gxlixTfvCc3Q,The Adicts,Rossini
9629,x1zHAZQ1XEIOYluZ,Northlane,Intuition
204,16RyPaV6dHRot8sb,Arcade Fire,Keep the Car Running
7807,m4Fi9M6eUutpi46D,Jhené Aiko,The Worst
9509,wHLvvABOicpFEQ1n,Marina and the Diamonds,Starring Role
3761,N6W1jqCJaPO4eSpP,Future Islands,Beauty Of The Road


In [46]:
# Text-based(euc-sim, word2vec)
text_based_retrieval(df_info, df_word2vec, "Jingle Bells", "Frank Sinatra", 10, euc_sim)

Unnamed: 0,id,artist,song
2463,EsxmiDUT0v0NDbWP,Thousand Foot Krutch,New Drug
4319,QUcZsJvxjp5NkHSx,Mr. Big,Shine
3462,LIYIBenQlQQnEjRA,The Three Degrees,Dirty Ol' Man
8349,pH97idDxXVlnq3xH,Dixie Chicks,Loving Arms
8756,rlMcTiHz9HSidm55,Everyone Everywhere,"$1,000,000,000"
4880,TuBvinshhNZgQpYN,Flume,Holdin On
1741,ASmQDQZeVJytIHp4,No Doubt,Full Circle
2646,G7KquYFevSK3v9Ve,Killer Be Killed,Face Down
8922,sqnkDpNrmNYok0Og,The Smashing Pumpkins,Beautiful
667,3ur9VXvhUvHSMZIK,Brandon Flowers,Hard Enough


In [47]:
text_based_retrieval(df_info, df_word2vec, "Shape of You", "Ed Sheeran", 10, euc_sim)

Unnamed: 0,id,artist,song
6805,g3wu8wzwF3KOPF0R,Brooks & Dunn,Neon Moon
5283,WUQwg3HyiTo7i35A,Grimes,Belly of the Beat
10064,zqZdtcELsPRdBuc8,The National,Sunshine On My Back
2460,Es84XYyrkHWLgdH3,Obituary,Sentence Day
9086,tjfPeXTj92fY1qz8,Built to Spill,Just A Habit
687,42wYoD6C0wYUTTjo,Renaissance,Ashes Are Burning
1141,6tMKqIJL1BFcRqSh,"Earth, Wind & Fire",Serpentine Fire
399,2FNongR2nPhLiJCM,Cher,My Song (Too Far Gone)
2264,DaXt87iqpalzFGLg,Sum 41,Some Say
7003,hAkFH1cGy1fLhCUr,Bea Miller,S.L.U.T.


In [48]:
text_based_retrieval(df_info, df_word2vec, "Natural", "Imagine Dragons", 10, euc_sim)

Unnamed: 0,id,artist,song
8060,nYETaVrDXIYBhV46,Westlife,What About Now
2425,EeipMx7HWhlB5l6F,Hilary Duff,Chasing the Sun
2680,GPYSBn9yPulH98cZ,Angels & Airwaves,Lifeline
3507,LZI7G5GNQShBvuXT,Dokken,Kiss of Death
4125,PNLSJajLOGUDAPyd,Gentle Giant,Pantagruel's Nativity
9684,xKctaJgVjshcu8k7,Corey Hart,Sunglasses at Night
5484,XmpF2EKY6mW7LMy9,Skillet,Saviors of the World
4407,R0kjvPkfpzkRCdE1,Primal Scream,Damaged
3740,MxvB9jqkYOnP5mgo,Bob Marley & The Wailers,She's Gone
8994,tEooHa0Wo0X7tjSl,Rita Ora,Shine Ya Light
