# Configuration

In [1]:
# directory where MSD audio samples are stored (root directory)
MSD_SAMPLE_DIR                      = "/mnt/dataset_storage/audio/music/MSD/audio/"

# path to "msd_amglabels_all.h5" - leave blank to skip
MSD_AMG_TAGS_PATH                   = "/home/schindlera/experiments/representation_from_album_review/data/msd_amglabels_all.h5"

# Album Reviews - leave blank to skip
MSD_AMG_REVIEW_TRACKID_MAPPING_PATH = "/home/schindlera/experiments/representation_from_album_review/data/MSD_AMG_REVIEW_TRACKID_MAPPING.h5"

# directory to store intermediate and final results of the experiment
EXPERIMENT_DIR                      = "/home/schindlera/experiments/ismir2020_reviews/"

# Imports

In [2]:
import os
import pandas as pd
import numpy as np

# Load Data

## Find all MSD audio samples

In [3]:
sample_fullpaths = []
sample_track_ids = []

for root, dirs, files in os.walk(MSD_SAMPLE_DIR):
    for file in files:
        if file.endswith(".mp3"):
            sample_fullpaths.append(os.path.join(root, file))
            sample_track_ids.append(file.replace(".mp3", ""))

metadata_audio          = pd.DataFrame(sample_fullpaths, index=sample_track_ids, columns=["audio_path"])
metadata_audio["audio"] = True

print("%d MSD audio samples found" % metadata_audio.shape[0])

995000 MSD audio samples found


## Join with Tagset Data

In [10]:
if os.path.exists(MSD_AMG_TAGS_PATH):

    # load labelsets from hdf5 archive
    msd_amgtags_all         = pd.read_hdf(MSD_AMG_TAGS_PATH, "data")

    # only use instances where at least one genre tag is provided
    msd_amgtags_all         = msd_amgtags_all[~msd_amgtags_all.genres.isna()]
    msd_amgtags_all["tags"] = True

    # join with audio metadata
    metadata_final          = metadata_audio.join(msd_amgtags_all, how="inner")

    print("Intersection size: %d" % metadata_final.shape[0])
    
else:
    
    metadata_final = metadata_audio

Intersection size: 504992


## Join with Album Review Dataset

In [11]:
if os.path.exists(MSD_AMG_REVIEW_TRACKID_MAPPING_PATH):

    MSD_lastfmid_trackid_mapping            = pd.read_hdf(MSD_AMG_REVIEW_TRACKID_MAPPING_PATH, "data")
    MSD_lastfmid_trackid_mapping            = MSD_lastfmid_trackid_mapping.set_index("track_id")
    MSD_lastfmid_trackid_mapping["reviews"] = True

    metadata_final = metadata_final.join(MSD_lastfmid_trackid_mapping, how="left")

    print("Intersection size: %d" % metadata_final.shape[0])

Intersection size: 504992


# Store Data

Store Metadata

In [12]:
metadata_final.to_hdf(EXPERIMENT_DIR + "/experiment_partition.h5", "data")

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['audio_path', 'genres', 'styles', 'moods', 'themes', 'artist_name',
       'title', 'release', 'reviews'],
      dtype='object')]

  encoding=encoding,


Store trackids for feature extraction

In [13]:
metadata_final.reset_index()[["index","audio_path"]].to_csv(EXPERIMENT_DIR + "/eval_trackids.csv", header=None, index=None)

Read Metadata

In [3]:
metadata_final = pd.read_hdf(EXPERIMENT_DIR + "/experiment_partition.h5", "data")

In [4]:
metadata_final

Unnamed: 0_level_0,audio_path,audio,genres,styles,moods,themes,tags,artist_name,title,release,lastfm_id,reviews,genres_flattened,train,val,test
trackid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
TRZZZHL128F423C536,/mnt/dataset_storage/audio/music/MSD/audio/Z/Z...,True,"[Pop/Rock, R&B]","[Album Rock, Blue-Eyed Soul, Contemporary Pop/...","[Calm/Peaceful, Laid-Back/Mellow, Warm, Elegan...","[Background Music, Comfort, Romantic Evening, ...",True,Van Morrison,Crazy Love (Album Version),Moondance,2028615.0,True,R&B,True,False,False
TRZZZWE12903CDD329,/mnt/dataset_storage/audio/music/MSD/audio/Z/Z...,True,[Electronic],[Funky Breaks],"[Gritty, Playful, Laid-Back/Mellow, Trippy, Wh...",[Hanging Out],True,,,,,,Electronic,False,False,True
TRZZZCL128F428BB80,/mnt/dataset_storage/audio/music/MSD/audio/Z/Z...,True,[Pop/Rock],"[Alternative/Indie Rock, Post-Hardcore, Goth R...","[Dramatic, Ethereal, Cathartic, Elegant, Enigm...","[Late Night, Reflection, Solitude, Heartache, ...",True,I Am Ghost,The Ship of Pills and Needed Things,Lover's Requiem,3403584.0,True,Pop/Rock,True,False,False
TRZZZMM128F42819C0,/mnt/dataset_storage/audio/music/MSD/audio/Z/Z...,True,[Pop/Rock],,,,True,,,,,,Pop/Rock,False,False,True
TRZZZGZ128F4292B90,/mnt/dataset_storage/audio/music/MSD/audio/Z/Z...,True,[Pop/Rock],"[Adult Alternative Pop/Rock, Alternative/Indie...","[Slick, Poignant, Bittersweet, Passionate, Liv...","[Day Driving, Girls Night Out, Empowering]",True,Andrea Corr,24 Hours (Album Version),Ten Feet High,3307948.0,True,Pop/Rock,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRYYYJB128F930A811,/mnt/dataset_storage/audio/music/MSD/audio/Y/Y...,True,[Jazz],"[Fusion, Post-Bop, Guitar Jazz, Jazz Instrument]",,,True,Rodney Jones,Gaze,When You Feel the Love,2533849.0,True,Jazz,True,False,False
TRYYYMZ128F42717A8,/mnt/dataset_storage/audio/music/MSD/audio/Y/Y...,True,[Pop/Rock],"[Alternative/Indie Rock, Emo, Punk-Pop]","[Passionate, Brash, Energetic, Rousing, Aggres...",,True,Ace Troubleshooter,Numinous (Madness Of The Crowd Album Version),The Madness of the Crowds,2035689.0,True,Pop/Rock,True,False,False
TRYYYCW128F9320BC3,/mnt/dataset_storage/audio/music/MSD/audio/Y/Y...,True,"[Electronic, Pop/Rock]","[Alternative/Indie Rock, Ambient Pop, Adult Al...","[Lush, Reflective, Gentle, Summery, Warm, Drea...","[Hanging Out, Day Driving, Reflection]",True,Zero 7,Futures [Acoustic Version],The Garden,2519148.0,True,Electronic,True,False,False
TRYYYJD128F429528C,/mnt/dataset_storage/audio/music/MSD/audio/Y/Y...,True,[Pop/Rock],"[Alternative Metal, Heavy Metal, Post-Grunge]","[Rousing, Tense/Anxious, Volatile, Yearning, E...","[Guys Night Out, Introspection]",True,Blindside,Where The Sun Never Dies (Album Version),About A Burning Fire,1422327.0,True,Pop/Rock,True,False,False
