* Get the contour data of each svara
* Aggregate contours and plot histogram per raaga/svara (all the artists in the same plot)
* Analyze the individual contours

In [None]:
import json
import codecs
import pickle
from copy import deepcopy
from scipy.ndimage.filters import gaussian_filter
import intonation

from os import chdir, listdir, mkdir
from os.path import isdir, basename, exists
from glob import glob

%matplotlib tk
rcParams['figure.figsize'] = (16.0, 9.0)

# Get contours

In [None]:
def find_nearest_index(arr, value):
    """For a given value, the function finds the nearest value
    in the array and returns its index."""
    arr = array(arr)
    index = (abs(arr-value)).argmin()
    return index

## Varnam dataset

In [None]:
def get_contours(note_alignment_file, pitch_file):
    align_data = json.load(file(note_alignment_file))[basename(note_alignment_file)[13:-5]]
    pitch_data = loadtxt(pitch_file)
    
    contours = {}
    
    pitch_data[:, 1][isinf(pitch_data[:, 1])] = inf
    
    for svara in align_data:
        start_ind = find_nearest_index(pitch_data[:, 0], svara['interval'][0])
        end_ind = find_nearest_index(pitch_data[:, 0], svara['interval'][1])
        
        if svara['pitchHeight']['Value'] in contours.keys():
            contours[svara['pitchHeight']['Value']].append([start_ind, end_ind])
        else:
            contours[svara['pitchHeight']['Value']] = [[start_ind, end_ind]]
    
    return contours

In [None]:
#data_dir = '/homedtic/gkoduri/data/intonation/varnam-analysis/recorded/audio/
data_dir = '/home/gkoduri/Dropbox/UPF-Work/PhD/Varnam Analysis/data/audioScoreAlignment/'
raagas = ['abhogi', 'begada', 'kalyani', 'mohanam', 'sahana', 'saveri', 'shree']
excluded = [i.strip() for i in codecs.open(data_dir + 'exclude.txt').readlines()]

In [None]:
chdir(data_dir)

for raaga in raagas:
    print raaga
    chdir(raaga)
    artists = listdir('.')
    artists = [a for a in artists if isdir(a)]
    for a in artists:
        if raaga + "/" + a in excluded:
            continue
            
        chdir(a)
        print a,
        
        alignment_files = glob('alignedNotes*.json')
        if 'alignedNotes.json' in alignment_files:
            alignment_files.remove('alignedNotes.json')
            
        for f in alignment_files:
            if exists(data_dir+raaga+'/'+a+'/contours_phrase_aligned/'+f[13:-5]+'.pickle'):
                continue
            try:
                contours = get_contours(data_dir+raaga+'/'+a+'/'+f, data_dir+raaga+'/'+a+'/'+a+'-cents.txt')
            except (IOError):
                print raaga + '/' + a + '/' + f + ' not found!'
                continue
            if not isdir(data_dir+raaga+'/'+a+'/contours_phrase_aligned/'):
                mkdir(data_dir+raaga+'/'+a+'/contours_phrase_aligned/')
            pickle.dump(contours, file(data_dir+raaga+'/'+a+'/contours_phrase_aligned/'+f[13:-5]+'.pickle', 'w'))
        chdir('..')
        
    print
    chdir('..')

## Kriti dataset

In [None]:
def get_contours(note_alignment_file, pitch_file):
    align_data = json.load(file(note_alignment_file))['dtw_100centBinarization_kmeans']
    pitch_data = loadtxt(pitch_file)
    
    contours = {}
    
    pitch_data[:, 1][isinf(pitch_data[:, 1])] = inf
    
    for svara in align_data[0]:
        start_ind = find_nearest_index(pitch_data[:, 0], svara['interval'][0])
        end_ind = find_nearest_index(pitch_data[:, 0], svara['interval'][1])
        
        if svara['pitchHeight']['Value'] in contours.keys():
            contours[svara['pitchHeight']['Value']].append([start_ind, end_ind])
        else:
            contours[svara['pitchHeight']['Value']] = [[start_ind, end_ind]]
    if '_NaN_' in contours.keys():
        contours.pop('_NaN_')
    return contours

In [None]:
feat_dir = '/homedtic/ssenturk/experiments/20-raagas/features/'
contour_dir = '/homedtic/ssenturk/experiments/20-raagas/features/phrase_aligned/'
align_dir = '/homedtic/ssenturk/experiments/20-raagas/features/noteAlignments/dtw_100centBinarization_kmeans/'

In [None]:
chdir(align_dir)
alignment_files = glob('*.json')

In [None]:
alignment_files[0][:-5]

In [None]:
for f in alignment_files:
    if exists('{0}/{1}-contours.pickle'.format(contour_dir, f[:-5])):
        continue
    try:
        contours = get_contours('{0}{1}'.format(align_dir, f), '{0}/pitch/{1}.txt'.format(feat_dir, f[:-5]))
    except:
        print f, ' is not successful'
        continue
    pickle.dump(contours, file('{0}/{1}-contours.pickle'.format(contour_dir, f[:-5]), 'w'))
    print f, 'is successful'

## Plot histograms

In [None]:
cents_to_svaras = pickle.load(file('/home/gkoduri/Dropbox/UPF-Work/PhD/Varnam Analysis/data/cents_to_svara_labels.pickle'))

In [None]:
chdir(data_dir)
plot_dir = '/home/gkoduri/Dropbox/UPF-Work/PhD/Varnam Analysis/data/plots/phrase_aligned/'

for raaga in raagas:
    print raaga
    chdir(raaga)
    artists = listdir('.')
    artists = [a for a in artists if isdir(a)]
    
    svara_data = {}
    
    # For each artist, for each svara, aggregate all the pitch values corresponding to the contour indices
    for a in artists:
        try:
            contours = pickle.load(file(data_dir+raaga+'/'+a+'/contours_phrase_aligned/dtw_100centBinarization_kmeans.pickle'))
            pitch_data = loadtxt(data_dir + raaga + '/' + a + '/' + a + '-cents.txt')
            for k, v in contours.items():
                pitch_contours = []
                for i in v:
                    temp = pitch_data[i[0]:i[1], 1]
                    temp = [val for val in temp if not isinf(val)]
                    pitch_contours.append(temp)
                if k in svara_data.keys():
                    svara_data[k].append(concatenate(pitch_contours))
                else:
                    svara_data[k] = [concatenate(pitch_contours)]
        except (IOError):
            print '{0}/{1} failed!'.format(raaga, a)
            continue
    
    # For each svara, plot histogram of pitch values from each artist seperately
    for svara, data in svara_data.items():
        if svara < 0 or svara >= 1200:
            continue
            
        figure()
        grid(color="0.35")
        a_count = 0
        for artist_data in data:
            n, be = histogram(artist_data, bins=int(max(artist_data)-min(artist_data)), density=True)
            bc = (be[1:]+be[:-1])/2.0
            ns = gaussian_filter(n, 5)
            plot(bc, ns, label=artists[a_count])
            a_count += 1
            
            hold(True)
            xlim(svara-350, svara+350)
            xticks(fontsize=24)
            yticks(fontsize=24)
            
        xlabel(raaga + ' - ' + str(cents_to_svaras[svara]), fontsize=24)
        #legend(fontsize=24)
        savefig(plot_dir + raaga + ' - ' + str(cents_to_svaras[svara]).replace('/', '|') + '.pdf', orientation='landscape')
        close('all')
    
    chdir('..')