In [1]:
!pip install pandas
import pandas as pd
import t3_utils as t3
import numpy as np
from ast import literal_eval
from youtubesearchpython import VideosSearch

Defaulting to user installation because normal site-packages is not writeable


In [2]:
# change the file path here
file_path_base = r"/home/dani/Dropbox/JKU/Semester-7/Multimedia_Search_and_Retrieval/Excercises/MMSR-Group-C/3/datasets/"
file_path_info = file_path_base + "id_information_mmsr.tsv" 
file_path_word2vec = file_path_base + "id_lyrics_word2vec_mmsr.tsv"
file_path_tfidf = file_path_base + "id_lyrics_tf-idf_mmsr.tsv"
file_path_bert = file_path_base + "id_lyrics_bert_mmsr.tsv"

file_path_genre = file_path_base + "id_genres_mmsr.tsv"
file_path_musicnn = file_path_base + "id_musicnn_mmsr.tsv"
file_path_mfcc_bow = file_path_base + "id_mfcc_bow_mmsr.tsv"
file_path_ivec_256 = file_path_base + "id_ivec256_mmsr.tsv"
file_path_logfluc = file_path_base + "id_blf_logfluc_mmsr.tsv"

file_path_incp = file_path_base  + "id_incp_mmsr.tsv"
file_path_vgg19 = file_path_base + "id_vgg19_mmsr.tsv"
file_path_resnet = file_path_base + "id_resnet_mmsr.tsv"
file_path_url = file_path_base + "id_url_mmsr.tsv"

df_info = pd.read_table(file_path_info)

# text
df_word2vec = pd.read_table(file_path_word2vec)
df_tfidf = pd.read_table(file_path_tfidf)
df_bert = pd.read_table(file_path_bert)

# audio
df_genre = pd.read_table(file_path_genre)
df_musicnn = pd.read_table(file_path_musicnn)
df_mfcc_bow = pd.read_table(file_path_mfcc_bow)
df_ivec_256 = pd.read_table(file_path_ivec_256)
df_logfluc = pd.read_table(file_path_logfluc)

# video
df_incp = pd.read_table(file_path_incp)
df_vgg19 = pd.read_table(file_path_vgg19)
df_resnet = pd.read_table(file_path_resnet)
df_url = pd.read_table(file_path_url)

df_genre["genre"] = df_genre["genre"].apply(literal_eval) # convert string into array

In [3]:
# normalizes and joins both features
df_normalized_bert = t3.normalize_frame(df_bert)
df_normalized_musicnn = t3.normalize_frame(df_mfcc_bow)
df_early_fusion = pd.merge(df_normalized_bert, df_normalized_musicnn, on="id", how="outer")

In [12]:
def get_youtube_info(row):
    song_artist = row["artist"]
    song_title = row["song"]

    query = f"{song_title} {song_artist}"
    videos_search = VideosSearch(query, limit=1)

    results = videos_search.result()

    if results['result']:
        video_url = results['result'][0]['link']
        video_id = results['result'][0]['id']
        video_view_count = results['result'][0]['viewCount']['short']
        
        return [video_url, video_id, video_view_count]
    else:
        print(f"No results found for {song_title} by {song_artist}")
        return ''
    

def calc_songs(row, df_info: pd.DataFrame, df_feature: pd.DataFrame, sim_func, filter: [], random: bool):
    song_artist = row["artist"]
    song_title = row["song"]
    
    filter = [] if filter is None else filter

    df_retr_songs = t3.song_retrieval(df_info, df_feature, song_title, song_artist, 10, sim_func=sim_func, filter=filter, random = random)
   
    return df_retr_songs


def calc_late_fusion_songs(row, df_info: pd.DataFrame, df_feature_1: pd.DataFrame, df_feature_2: pd.DataFrame, filter: []):
    song_artist = row["artist"]
    song_title = row["song"]
    
    filter = [] if filter is None else filter

    df_retr_songs = t3.late_fusion_retrieval(df_info, df_feature_1, df_feature_2, song_title, song_artist, 10)
    
    return df_retr_songs if filter == [] else df_retr_songs[filter]


In [4]:
import os # Import df_info with youtube links

file_path = "df_yt_info.json"
if os.path.exists(file_path):
    df_info = pd.read_json(file_path)
else:
    res = df_info.apply(get_youtube_info, axis=1)

    yt_urls = [item[0] for item in res]
    yt_ids = [item[1] for item in res]
    yt_view_counts = [item[2] for item in res]
    df_info = df_info.assign(yt_url=yt_urls)
    df_info = df_info.assign(yt_id=yt_ids)
    df_info = df_info.assign(yt_view_count=yt_view_counts)
    
    df_info.to_json(orient='records', indent=2, path_or_buf=f'{file_path}')
df_info

Unnamed: 0,id,artist,song,album_name,yt_url,yt_id,yt_view_count
0,01Yfj2T3YTwJ1Yfy,We As Human,Take The Bullets Away (feat. Lacey Sturm),We As Human,https://www.youtube.com/watch?v=v2T40qPAO-0,v2T40qPAO-0,1.6M views
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),https://www.youtube.com/watch?v=22YBfS3hLrg,22YBfS3hLrg,1.6M views
2,01rMxQv6vhyE1oQX,Against the Current,Chasing Ghosts,In Our Bones,https://www.youtube.com/watch?v=f7dx1GgO7CI,f7dx1GgO7CI,1.8M views
3,02RGE9FNH65RtMS7,Barthezz,Infected,Trance - The Early Years (1997-2002),https://www.youtube.com/watch?v=mNUFRFlwDPw,mNUFRFlwDPw,1.7M views
4,02ZnlCGZEbkfCDxo,Laura Pausini,Tra Te E Il Mare,The Best of Laura Pausini - E Ritorno Da Te,https://www.youtube.com/watch?v=gl17x1L_8Kc,gl17x1L_8Kc,20M views
...,...,...,...,...,...,...,...
10090,zyzILCQvVeUFIINi,Crowded House,When You Come,Temple Of Low Men,https://www.youtube.com/watch?v=LUP3XOa3yRs,LUP3XOa3yRs,643K views
10091,zzgS4ZqyswamEWNj,Britney Spears,My Only Wish (This Year),Platinum Christmas,https://www.youtube.com/watch?v=_MzumcY3lpk,_MzumcY3lpk,11M views
10092,zzoFYDMlqU1X2zz1,Thundercat,DUI,Drunk,https://www.youtube.com/watch?v=O0kS88zAzh8,O0kS88zAzh8,109K views
10093,zzpkRCGA5ud8q4mv,Otis Redding,Rock Me Baby,Otis Blue,https://www.youtube.com/watch?v=rKpI3o-1CSM,rKpI3o-1CSM,1.3M views


In [5]:
# insert genres for each song
df_info = pd.merge(df_info, df_genre, on = "id")
df_info

Unnamed: 0,id,artist,song,album_name,yt_url,yt_id,yt_view_count,genre
0,01Yfj2T3YTwJ1Yfy,We As Human,Take The Bullets Away (feat. Lacey Sturm),We As Human,https://www.youtube.com/watch?v=v2T40qPAO-0,v2T40qPAO-0,1.6M views,"[rock, christian rock]"
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),https://www.youtube.com/watch?v=22YBfS3hLrg,22YBfS3hLrg,1.6M views,"[hip hop, rap, grindcore, death metal]"
2,01rMxQv6vhyE1oQX,Against the Current,Chasing Ghosts,In Our Bones,https://www.youtube.com/watch?v=f7dx1GgO7CI,f7dx1GgO7CI,1.8M views,"[rock, pop punk]"
3,02RGE9FNH65RtMS7,Barthezz,Infected,Trance - The Early Years (1997-2002),https://www.youtube.com/watch?v=mNUFRFlwDPw,mNUFRFlwDPw,1.7M views,"[trance, techno, progressive trance]"
4,02ZnlCGZEbkfCDxo,Laura Pausini,Tra Te E Il Mare,The Best of Laura Pausini - E Ritorno Da Te,https://www.youtube.com/watch?v=gl17x1L_8Kc,gl17x1L_8Kc,20M views,"[pop, italian pop, latin, europop, ambient, po..."
...,...,...,...,...,...,...,...,...
10089,zyzILCQvVeUFIINi,Crowded House,When You Come,Temple Of Low Men,https://www.youtube.com/watch?v=LUP3XOa3yRs,LUP3XOa3yRs,643K views,"[rock, pop, indie rock, folk rock, new wave, m..."
10090,zzgS4ZqyswamEWNj,Britney Spears,My Only Wish (This Year),Platinum Christmas,https://www.youtube.com/watch?v=_MzumcY3lpk,_MzumcY3lpk,11M views,"[pop, rock, teen pop, soundtrack, singer songw..."
10091,zzoFYDMlqU1X2zz1,Thundercat,DUI,Drunk,https://www.youtube.com/watch?v=O0kS88zAzh8,O0kS88zAzh8,109K views,"[jazz, jazz fusion]"
10092,zzpkRCGA5ud8q4mv,Otis Redding,Rock Me Baby,Otis Blue,https://www.youtube.com/watch?v=rKpI3o-1CSM,rKpI3o-1CSM,1.3M views,"[soul, blues, r b, blues rock, southern soul, ..."


In [6]:
rs_info = [    
                ("random", df_info, t3.cos_sim), ("word2vec euc-sim", df_word2vec, t3.euc_sim),
                ("tfidf cos-sim", df_tfidf, t3.cos_sim), ("bert cos-sim", df_bert, t3.cos_sim),
                ("mfcc_bow cos-sim", df_mfcc_bow, t3.cos_sim), ("musicnn cos-sim", df_musicnn, t3.cos_sim),
                ("ivec_256 cos-sim", df_ivec_256, t3.cos_sim), ("logfluc cos-sim", df_logfluc, t3.cos_sim),
                ("resnet cos-sim", df_resnet, t3.cos_sim) , ("early fusion cos-sim", df_early_fusion, t3.cos_sim),
                ("late fusion cos-sim", [df_bert, df_musicnn], t3.cos_sim)
           ]

In [15]:
df_data = df_info.copy()

# if an error occurs: please delete the song "03Oc9WeMEmyLLQbj" in information.csv
for index, feature_set, sim_func in rs_info:
    random = index == "random"
    if index == "late fusion cos-sim":
        df_data[f'{index}'] = df_data.apply(calc_late_fusion_songs, axis=1, args=(df_info, df_bert, df_musicnn, ["id"]))
    else:
        df_data[f'{index}'] = df_data.apply(calc_songs, axis=1, args=(df_info, feature_set, sim_func, ["id"], random))
    print(f"Finished {index}")
    

df_data.to_json(orient='records', indent=2, path_or_buf=f'data-test.json')
df_data

Finished random
Finished word2vec euc-sim
Finished tfidf cos-sim
Finished bert cos-sim
Finished mfcc_bow cos-sim
Finished musicnn cos-sim
Finished ivec_256 cos-sim
Finished logfluc cos-sim
Finished resnet cos-sim


Unnamed: 0,id,artist,song,album_name,yt_url,yt_id,yt_view_count,genre,random,word2vec euc-sim,tfidf cos-sim,bert cos-sim,mfcc_bow cos-sim,musicnn cos-sim,ivec_256 cos-sim,logfluc cos-sim,resnet cos-sim,late fusion cos-sim
0,01Yfj2T3YTwJ1Yfy,We As Human,Take The Bullets Away (feat. Lacey Sturm),We As Human,https://www.youtube.com/watch?v=v2T40qPAO-0,v2T40qPAO-0,1.6M views,"[rock, christian rock]",id 10056 zmukOoB01ASq2Y6...,id 538 38mNvmgJUzLn7Pf7 ...,id 2496 F4whfybQ8r8xQtb0 ...,id 7754 llBi1Fz8svaJKewh ...,id 9950 z0UqEOsRBaWGWnqi ...,id 671 3wWBRgGCFZydyQhO ...,id 934 5YSYXq6DEMzRslCN ...,id 6005 bBquA6LRnsrO0Orj ...,id 2560 FYbi5F6qvTpd7mZm ...,id 6 NcJM6LYjo623WZNd 3 ...
1,01gyRHLquwXDlhkO,The Notorious B.I.G.,Somebody's Gotta Die,Life After Death (Remastered Edition),https://www.youtube.com/watch?v=22YBfS3hLrg,22YBfS3hLrg,1.6M views,"[hip hop, rap, grindcore, death metal]",id 9057 tbjOkdGL9PvwADZK ...,id 2823 H9krdY6rFJl4VYVR ...,id 9573 wiw2rM2xb4CJ5sVP ...,id 4031 OokmnsloeW1Sh3NF ...,id 4894 TynRPxT2eYqsE1vD ...,id 6835 gE39Ms4AoMKpJNGi ...,id 4894 TynRPxT2eYqsE1vD ...,id 1494 8uMvt3XmfDU1oMz2 ...,id 4540 RmkwkCwdu5013Awo ...,id 3 M65mU1UIozrDxcvu 5 ...
