In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import re
import os
import warnings
import matplotlib as mpl

from tqdm import tqdm

warnings.filterwarnings("ignore")

In [2]:
%cd ../..

/home/irep/Desktop/lumen-audio


In [3]:
def read_all_names_train(split="train"):
    all_instr = os.listdir(f"./data/irmas/{split}")
    all_dirs = [
        f"./data/irmas/{split}/{i}" for i in all_instr if os.path.isdir(f"./data/irmas/{split}/{i}")
    ]
    all_files = []
    for d, instr in zip(all_dirs, all_instr):
        all_files.extend([f"./data/irmas/{split}/{instr}/{j}" for j in os.listdir(d)])
    return all_files

def find_all_features(train_names):
    instruments, drums, genre = [], [], []
    for name in train_names:
        match = re.findall("\[([^]]+)\]", name)
        if "nod" not in match and "dru" not in match:
            match.insert(1, None)
        if len(match) != 3:
            match.remove("---")
        instruments.append(match[0])
        drums.append(match[1])
        genre.append(match[2])
    return instruments, drums, genre

all_train_files = read_all_names_train("train")
instruments, drums, genre = find_all_features(all_train_files)

df_train = pd.DataFrame.from_dict({
    "path":all_train_files,
    "predominant_instrument":instruments,
    "drums":drums,
    "genres":genre
})

df_train.head()

Unnamed: 0,path,predominant_instrument,drums,genres
0,./data/irmas/train/gel/071__[gel][dru][cou_fol...,gel,dru,cou_fol
1,./data/irmas/train/gel/[gel][pop_roc]1034__3.wav,gel,,pop_roc
2,./data/irmas/train/gel/[gel][pop_roc]0898__3.wav,gel,,pop_roc
3,./data/irmas/train/gel/194__[gel][dru][cou_fol...,gel,dru,cou_fol
4,./data/irmas/train/gel/081__[gel][dru][pop_roc...,gel,dru,pop_roc


In [4]:
def read_all_names_test():
    all_txt = sorted([i for i in os.listdir("data/irmas/test") if i.endswith(".txt")])
    all_wav = sorted([i for i in os.listdir("data/irmas/test") if i.endswith(".wav")])
    instruments, names = [], []
    for i, j in zip(all_wav, all_txt):
        labels = [k.strip() for k in open(f"./data/irmas/test/{j}").read().split("\n") if k.strip() != ""]
        name = i
        instruments.append(labels)
        names.append(f"./data/irmas/test/{name}")
    return instruments, names
        
def count_num_songs():
    all_wavs = sorted([i for i in os.listdir("data/irmas/test") if i.endswith(".wav")])
    song_names = set(["-".join(i.split("-")[:-1]) for i in all_wavs])
    print("number of songs:", len(song_names))
    
count_num_songs()
instruments, names = read_all_names_test()

df_test = pd.DataFrame.from_dict({
    "instruments": instruments,
    "path":names
})
pattern = re.compile(r'^(.*?)-(\d{1,2})\.wav$')
df_test[['song_name', 'song_number']] = df_test['path'].str.extract(pattern)
song_groups = df_test.groupby('song_name')
song_id = 1
for name, group in song_groups:
    df_test.loc[group.index, 'song_id'] = song_id
    song_id += 1
df_test['song_id'] = df_test['song_id'].astype(int)
df_test['song_number'] = df_test['song_number'].astype(int)
df_test = df_test.sort_values(['song_id', 'song_number'], ascending=[True, True])

df_test.head(30)

number of songs: 208


Unnamed: 0,instruments,path,song_name,song_number,song_id
0,[gel],./data/irmas/test/(02) dont kill the whale-1.wav,./data/irmas/test/(02) dont kill the whale,1,1
6,"[gel, voi]",./data/irmas/test/(02) dont kill the whale-2.wav,./data/irmas/test/(02) dont kill the whale,2,1
7,"[gel, voi]",./data/irmas/test/(02) dont kill the whale-3.wav,./data/irmas/test/(02) dont kill the whale,3,1
8,[gel],./data/irmas/test/(02) dont kill the whale-4.wav,./data/irmas/test/(02) dont kill the whale,4,1
9,"[gel, voi]",./data/irmas/test/(02) dont kill the whale-6.wav,./data/irmas/test/(02) dont kill the whale,6,1
10,[gel],./data/irmas/test/(02) dont kill the whale-8.wav,./data/irmas/test/(02) dont kill the whale,8,1
11,"[gel, pia]",./data/irmas/test/(02) dont kill the whale-9.wav,./data/irmas/test/(02) dont kill the whale,9,1
1,[gel],./data/irmas/test/(02) dont kill the whale-11.wav,./data/irmas/test/(02) dont kill the whale,11,1
2,"[gel, voi]",./data/irmas/test/(02) dont kill the whale-12.wav,./data/irmas/test/(02) dont kill the whale,12,1
3,"[gel, voi]",./data/irmas/test/(02) dont kill the whale-13.wav,./data/irmas/test/(02) dont kill the whale,13,1


In [None]:
def instruments_compared_to_genre():
    plt.bar("")
    