In [2]:
## Dataset folders
data_root = "/homes/alb30/datasets/"
smt_guitar_root = data_root + "IDMT-SMT-GUITAR/"
guitarset_root = data_root + "GuitarSet/"
gpt_root = data_root + "GuitarPlayingTechniques/"

In [3]:
import os
from pprint import pprint

def unix_find(pathin):
    """Return results similar to the Unix find command run without options
    i.e. traverse a directory tree and return all the file paths
    """
    return [os.path.join(path, file)
            for (path, dirs, files) in os.walk(pathin)
            for file in files]

In [4]:
smt_guitar_annot = smt_guitar_root+"dataset2/annotation/"
smt_guitar_audio = smt_guitar_root+"dataset2/audio/"

smt_guitar_audio_files = unix_find(smt_guitar_audio)
smt_guitar_annot_files = unix_find(smt_guitar_audio)

In [41]:
import librosa
import numpy as np
import pandas as pd
import re
import IPython.display as ipd
import xml.etree.ElementTree as ET
import math


class SMTGuitar:
    def __init__(self, root = data_root):
        # Dataset root folder
        self.root = root 
        self.root_dir = self.root + "IDMT-SMT-GUITAR/"
        # Token for identifying licks
        self.token = 'Lick'
        # File extension
        self.extension = {
            'audio': 'wav',
            'annotation': 'xml'
        }
        
        # Audio and annotation directories for dataset 2 (licks)
        self.d2_dir = {
            "audio": self.root_dir + "dataset2/audio/",
            "annotation": self.root_dir + "dataset2/annotation/"
        }
        # Tokens for extraction of information from file suffix
        self.suffix_tokens = {
            'excitation_styles': {
                'F': 'FS',
                'K': 'MU',
                'M': 'PK'
                },
            'expression_styles': {
                'DN': 'DN',
                'V': 'VI',
                'S': 'SL',
                'B': 'BE',
                'H': 'HA',
                'N': 'NO'
            }
        }
        # Excitation and expression styles for 
        self.styles = {
            'excitation': {
                'FS': 'finger_style',
                'MU': 'palm_muted',
                'PK': 'picked'
            }, 
            'expression': {
#                 'FL': 'flutter',
#                 'ST': 'staccato',
#                 'TR': 'tremolo',
                'NO': 'normal',
                'BE': 'bending',
                'DN': 'dead_notes',
                'HA': 'harmonics',
                'SL': 'slide',
                'VI': 'vibrato'
            }
        }
        # Index of polyphonic licks
        self.polyphonic_licks = [7, 8, 9, 10, 12]
        
        # Dataframe to hold data from licks
        self.df = pd.DataFrame(columns = ['name', 'lick', 'mono', 'guitar', 'excitation', 'expression', 'position', 'fs', 'len', 'ms', 'samples'])
        
        # Dataframe to hold annotations
        self.annotations = pd.DataFrame(columns = ['name', 'onset', 'offset', 'excitation', 'expression'])
        
    def extract_file_names(self, dir, ext):
        # Get list of audio files 
        files = librosa.util.find_files(dir, ext=ext) 
        # Filter files corresponding to licks
        files = list(filter(lambda f: self.token in f, files))
        # Get file names
        file_names = list(map(lambda f: f.split('/')[-1], files))
        # Remove extension from file name
        file_names = list(map(lambda f: f.split('.')[0], file_names))
        
        return file_names, files
    
    def extract_audio(self, sr='None'): 
        # Get list of audio files & names
        file_names, files = self.extract_file_names(self.d2_dir["audio"], self.extension["audio"])
        # Extract IDs 
        ids = list(map(lambda f: int(re.search(self.token + "(.*?)_", f).group(1)), file_names))\
        # Extract guitar type 
        guitar_type = list(map(lambda f: re.search("^(.*?)_", f).group(1), file_names))
        # Extract style suffix
        styles = list(map(lambda f: re.search(self.token + "\d{1,}_(.*?)$", f).group(1), file_names))
        # Parse styles from suffix
        styles = list(map(self.parse_style_from_suffix, styles));

        # Extract audio and append information to dataframe
        for i, f in enumerate(files):
            x, fs = librosa.load(f, sr=None)
            self.df = self.df.append({
                'name' : file_names[i], 
                'lick': ids[i],
                'mono': ids[i] not in self.polyphonic_licks,
                'guitar': guitar_type[i],
                'excitation': styles[i]['excitation'],
                'expression': styles[i]['expression'],
                'position': styles[i]['position'],
                'fs': fs,
                'ms': 1000 * x.shape[0]/fs,
                'len': x.shape[0],
                'samples': x
            }, ignore_index=True, verify_integrity=True)
       
        # Make certain columns numeric
        numeric_col = ['lick', 'fs', 'ms', 'len']
        self.df = self.df.apply(lambda s: pd.to_numeric(s) if s.name in numeric_col else s, axis=1)

        return self.df
    
    def extract_annotations(self):

        # File names (from audio files)
        file_names = self.df['name'].tolist()

        # Get xml file names
        xml_file_names, xml_files = self.extract_file_names(self.d2_dir["annotation"], self.extension["annotation"])

        # Check for discrepancies between audio and file names
        names_not_in_annotation = list(set(file_names) - set(xml_file_names))
        names_not_in_audio = list(set(xml_file_names) - set(file_names))

        # Remap discrepancies
        # DISCLAIMER: this is very specific to the IDMT_SMT_DATASET
        audio_to_annot_dict = dict(zip(sum( list(
                        map(lambda b: 
                            list(
                                filter(lambda a: b in a , names_not_in_audio)
                            ), names_not_in_annotation
                        )
                    ), []), names_not_in_annotation))

        # 'Fix' xml file names for compatibility
        xml_file_names = list(map(lambda x: audio_to_annot_dict[x] if x in audio_to_annot_dict else x, xml_file_names))

        # Create dictionary between names and paths
        xml_file_name_dict = zip(xml_file_names, xml_files)

        # Extract annotations from xml files
        for name, path in xml_file_name_dict:
            tree = ET.parse(path)
            root = tree.getroot()
            for e in root.findall('./transcription/event'):
                self.annotations = self.annotations.append({
                        'name' : name, 
                        'onset': float(e.find('onsetSec').text),
                        'offset': float(e.find('offsetSec').text),
                        'excitation': e.find('excitationStyle').text,
                        'expression': e.find('expressionStyle').text
                }, ignore_index=True, verify_integrity=True)    

        return self.annotations
        
    def extract_transcript(self):
        # Get names from annotation dataframe
        names = self.annotations['name'].unique()

        # Number of different expression styles
        n_exp = len(self.styles['expression'])

        self.df['transcript'] = None
        self.df['num_exp'] = None

        # For each lick
        for n in names:
            # Extract corresponding annotations
            f_annot = self.annotations[self.annotations['name'] == n]
            # Get lick information for corresponding file
            audio_df = self.df[self.df['name'] == n]
            # Get original id (from dataframe)
            orig_idx = audio_df.index
            # Get sample frequency
            fs = int(audio_df['fs'])
            # Get file length
            length = int(audio_df['len'])

            # Initialise transcript ndarray to zero
                # rows: expression styles
            transcript = np.zeros((n_exp, length), dtype=int)
            # Initialise dictionary for number of annotated expressions
            exp_in_transcript = dict.fromkeys(self.styles['expression'], 0)
            # For each style
            for idx, exp in enumerate(self.styles['expression']):
                # Get corresponding annotations
                e_annot = f_annot[f_annot['expression'] == exp]
                # For each annotation
                for i, annot in e_annot.iterrows():
                    exp_in_transcript[exp] = exp_in_transcript[exp]+1
                    # Find time bounds
                    bounds = [int(fs*annot['onset']), int(math.ceil(fs*annot['offset']))]
                    # Update corresponding row in transcript
                    transcript[idx, bounds[0]:bounds[1]] = 1
                    
            # Update transcript in dataframe
            self.df['transcript'][orig_idx] = [transcript]
            self.df['num_exp'][orig_idx] = [exp_in_transcript]
            
        return self.df['transcript'], self.df['num_exp']
    
    def get_simple_transcript(self):
        exp_keys = list(self.styles['expression'].keys())
        no_idx = exp_keys.index('NO')
        bool_mask = np.ones((len(exp_keys),), bool)
        bool_mask[no_idx] = False
        
        self.df['transcript_simple'] = None
        self.df['num_exp_simple'] = None

        for idx, row in self.df.iterrows():
            st = np.apply_along_axis(lambda x: 1*np.logical_or.reduce(x), 0, row['transcript'][bool_mask, :])
            self.df['transcript_simple'][idx] = st
            exp_obj = {'NO': row['num_exp']['NO'], 'EXP': 0}
            for e in exp_keys:
                if e is not 'NO':
                    exp_obj['EXP'] = exp_obj['EXP'] + row['num_exp'][e]
    
        return self.df['transcript_simple']

        
    # Filter dataframe based on input dictionary
    def filter_by(self, filt_dict, orginal_df=None):
        o_df = self.df if not orginal_df else orginal_df 
        filt_items = o_df
        for key, value in filt_dict.items():
            if key in o_df:
                if o_df[key].dtype == 'object':
                    v = value if isinstance(value , list) else [value]
                    filt_items = filt_items[filt_items[key].str.contains('|'.join(v))]
                else:
                    filt_items = filt_items.loc[o_df[key]==value]
        return filt_items

    # Parse expression and excitation styles and playing position from file name
    def parse_style_from_suffix(self, style_str):
        styles = {
            'suffix': style_str,
            'excitation': '',
            'expression': '',
            'position': 0
        }       
        style_arr = style_str.split('_')
                
        # Extract position
        if len(style_arr) > 1:
            lage = re.search("Lage(.*?)", style_arr[1]).group(1)
            styles['position'] = int(lage) if lage != '' else 1
        
        style_arr = style_arr[0]

        # Extract excitation style
        for key in self.suffix_tokens['excitation_styles']:
            if key in style_arr:
                styles['excitation'] = self.suffix_tokens['excitation_styles'][key]
                style_arr = style_arr.replace(key, '')
                break
        
        # Extract expression style
        expr_styles = list()
        for key in self.suffix_tokens['expression_styles'].keys():
            if key in style_arr:
                expr_styles.append(self.suffix_tokens['expression_styles'][key])
                style_arr = style_arr.replace(key, '')
        styles['expression'] = ','.join(expr_styles)     
        
        return styles
    
    # Return playing data for IPython.display.audio
    def play_data(self, idx=None, name=None):
        if idx is not None and idx in self.df.index:
            lick = self.df.iloc[idx]
        if name is not None:
            lick = self.df.filter_by({'name': name})
        return {'data': lick['samples'], 'rate': lick['fs']}
    
    # Get row by order (ignoring original index)
    def get_by_order (self, el, df=None):
        if df is None:
            df = self.df
        if(el < len(df.index)):
            return df.iloc[el]
        else:
            return None
                       
 
smt_guitar = SMTGuitar(data_root)

In [42]:
smt_guitar.extract_audio();
smt_guitar.extract_annotations();
smt_guitar.extract_transcript();

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [43]:
smt_guitar.get_simple_transcript();

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0
1
2
3
4
5
6
7
8
9
10
11


In [30]:
bend = smt_guitar.filter_by({'expression': 'BE'})
slide = smt_guitar.filter_by({'expression': 'SL'})
bend_and_slide = pd.concat([bend, slide], axis=1, join='inner')

mono_licks = smt_guitar.filter_by({'mono': True})

import random
el = random.randint(0,len(mono_licks.index))
lick = smt_guitar.get_by_order(el=el, df=mono_licks)
ipd.Audio(**smt_guitar.play_data(idx=lick.name))

In [31]:
smt_guitar.annotations

Unnamed: 0,name,onset,offset,excitation,expression
0,AR_Lick10_FN,1.4476,2.8951,FS,NO
1,AR_Lick10_FN,1.9737,2.9145,FS,NO
2,AR_Lick10_FN,2.4508,2.9264,FS,NO
3,AR_Lick10_FN,2.9533,4.1618,FS,NO
4,AR_Lick10_FN,3.4794,4.4088,FS,NO
...,...,...,...,...,...
4036,LP_Lick9_MN_Lage,8.9651,9.1084,MU,NO
4037,LP_Lick9_MN_Lage,9.3551,13.9496,MU,NO
4038,LP_Lick9_MN_Lage,9.3624,13.6718,MU,NO
4039,LP_Lick9_MN_Lage,9.3878,10.8280,MU,NO


In [33]:
expr_count = annotation_df['expression'].value_counts()
print('Expr. count: ',dict(expr_count))
print('Normal: %s' % expr_count[0], 'Other: %s' % expr_count[1:].sum())

NameError: name 'annotation_df' is not defined

In [32]:
smt_guitar.df

Unnamed: 0,name,lick,mono,guitar,excitation,expression,position,fs,len,ms,samples,transcript,num_exp,transcript_simple
0,AR_Lick10_FN,10,False,AR,FS,NO,0,44100,735072,16668.299320,"[0.0, 0.0, -2.3841858e-07, -2.3841858e-07, -3....","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 48, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,AR_Lick10_KN,10,False,AR,MU,NO,0,44100,706816,16027.573696,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 48, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,AR_Lick10_MN,10,False,AR,PK,NO,0,44100,677116,15354.104308,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 48, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,AR_Lick11_FN,11,True,AR,FS,NO,0,44100,1026815,23283.786848,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 44, 'BE': 13, 'DN': 0, 'HA': 3, 'SL': 2...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,AR_Lick11_KN,11,True,AR,MU,NO,0,44100,1077006,24421.904762,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 44, 'BE': 13, 'DN': 0, 'HA': 3, 'SL': 2...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,LP_Lick9_FN_Lage,9,False,LP,FS,NO,1,44100,736357,16697.437642,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 20, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
194,LP_Lick9_KN,9,False,LP,MU,NO,0,44100,621321,14088.911565,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 20, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
195,LP_Lick9_KN_Lage,9,False,LP,MU,NO,1,44100,684912,15530.884354,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 20, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
196,LP_Lick9_MN,9,False,LP,PK,NO,0,44100,587616,13324.625850,"[0.0, 0.0, 1.1920929e-07, 2.3841858e-07, 3.576...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'NO': 20, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [38]:
smt_guitar.df['num_exp'][0]

{'NO': 48, 'BE': 0, 'DN': 0, 'HA': 0, 'SL': 0, 'VI': 0}

In [None]:
self.df

# for idx, lick in self.df.iterrows():
#     transcript = self.df['transcript'][idx]
#     print(transcript)
#     break

In [None]:
idx = 4
array = np.where(self.df['transcript'][idx]==1)[0]

# array.unique()
unique_array = np.unique(array)
exp_array = np.array(list(self.styles['expression']))
print(exp_array[unique_array], self.df['num_exp'][idx])
exp_in_trans = list({key:value for (key,value) in self.df['num_exp'][idx].items() if value != 0}.keys())
print(exp_in_trans)

print(set(exp_array[unique_array])==set(exp_in_trans))


In [None]:
a = np.array([
    [0,1,1,1,0],
    [0,1,0,1,0],
    [0,0,1,1,0]
])

np.apply_along_axis(lambda x: 1*np.logical_or.reduce(x), 0, a)
# np.apply_along_axis(lambda x: 1*np.add.reduce(x), 0, a)

# np.add.reduce(a, 1, where=[True, False])

In [None]:
b = [0,0,0,0]
np.logical_or.reduce(b)

In [None]:
idx = 4
array = self.df['transcript'][idx]
exp_keys = list(self.styles['expression'].keys())
no_idx = exp_keys.index('NO')
bool_mask = np.ones((len(exp_keys),), bool)
bool_mask[no_idx] = False
np.apply_along_axis(lambda x: 1*np.logical_or.reduce(x), 0, array[bool_mask, :])

In [None]:
array = self.df['transcript']
print(array)
# for idx, val in array.items():
#     print(i)
#     print(x)

In [None]:
max_size = int(f_annot['offset'].max()*fs)
print(max_size)

In [None]:
mono_clips = smt_guitar.filter_by({'mono': True})

a = mono_clips['num_exp']
print(a[3]['NO'])

# a.loc[lambda x : x[1]['NO'] !=0 ]
for l in a.iteritems():
    print(l)
    break

In [None]:
for idx, d in df[column].iteritems():

In [None]:
smt_guitar.df[0]