In [1]:
%matplotlib inline

import sys
import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import qgrid
import xml.etree.ElementTree as ET

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from python.utils import *

## Load data

In [8]:
data = pd.read_csv('../data/data.csv', index_col=0)

performance = pd.read_csv('../data/performance.csv', index_col=0)

In [12]:
qgrid.show_grid(data)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

## GTZAN

In [141]:
files_GTZAN = sorted(os.listdir('../data/annotations/GTZAN/xml/'))

In [152]:
for i, file in enumerate(files_GTZAN):
    print("Progress file {:d} of {:d}".format(i+1, len(files_GTZAN)), end="\r")
    
    tree = ET.parse('../data/annotations/GTZAN/xml/{}'.format(file))
    root = tree.getroot()

    beats = []
    measure = []
    counter = 1

    for child in root:
        if child.get('time') != None:
            if child.getchildren()[0].get('beat') == '1':        
                beats.append(float(child.get('time')))
                if child.getchildren()[0].get('measure') == '1':
                    counter = 1
                    measure.append(counter)
                else:
                    counter = counter + 1
                    measure.append(counter)

    # Correct measure
    signature = max(measure[:10])
    try:
        first_occurance = measure.index(1)
        for i, j in enumerate(reversed(range(first_occurance))):
            measure[j] = signature-i
    except ValueError:
        measure = np.zeros(len(beats))
#         print(file)
        
    name = file[:-8]
    
    data = np.array([beats, measure])
    
    np.savetxt('../data/annotations/GTZAN/{}.beats'.format(name), data.T, 
               delimiter='', fmt=['%.10f ', '%d'])

Progress file 1000 of 1000

## Hainsworth

In [5]:
hw = pd.read_csv('../../data/hainsworth/data.txt', sep="<sep>", header=None, engine='python')

In [58]:
for i, beats in enumerate(hw[10]):
    beats = beats[2:-2]
    beats = beats.split(',')
    beats = [int(float(beat)) for beat in beats]
    beats = np.array(beats)/44100
    
    name = hw.at[i, 0]
    name = name[:-6]

    np.savetxt('../data/annotations/HW{}.beats'.format(name), beats, fmt='%.10f ')

## Add new data

In [None]:
files_ballroom = sorted(os.listdir('../data/audio/'))
files_ballroom = [file[:-4] for file in files_ballroom] # delete file ending

In [None]:
files_smc = [file for file in sorted(os.listdir('../data/audio/')) if file.startswith("SMC")]
files_smc = [file[:-4] for file in files_smc] # delete file ending

In [65]:
files_hw = [file for file in sorted(os.listdir('../data/audio/')) if file.startswith("HW")]
files_hw = [file[:-4] for file in files_hw] # delete file ending

In [158]:
files_gtzan = [file for file in sorted(os.listdir('../data/audio/GTZAN/')) if not file.startswith('.')]
files_gtzan = [file[:-4] for file in files_gtzan] # delete file ending

In [15]:
beatles_albums = [file for file in sorted(os.listdir('../data/audio/Beatles/')) if not file.startswith('.')]
beatles = []
for album in beatles_albums:
    tracks = ['{}/{}'.format(album, file) for file in sorted(
        os.listdir('../data/audio/Beatles/{}/'.format(album))) if not file.startswith('.')]
    beatles = beatles + tracks
    beatles = [file[:-4] for file in beatles] # delete file ending

In [11]:
beatles_albums = [file for file in sorted(os.listdir('../data/audio/Beatles/')) if not file.startswith('.')]
for album in beatles_albums:
    tracks = ['{}'.format(file) for file in sorted(
        os.listdir('../data/audio/Beatles/{}/'.format(album))) if not file.startswith('.')]
    old_tracknames = ['{}'.format(file) for file in sorted(
        os.listdir('../data/audio/The-Beatles-Original-Studio-Albums/{}/'.format(album))) if not file.startswith('.')]
    for i in range(len(tracks)):
        os.rename('../data/audio/The-Beatles-Original-Studio-Albums/{}/{}'.format(album, old_tracknames[i]),
                  '../data/audio/The-Beatles-Original-Studio-Albums/{}/{}'.format(album, tracks[i])) 

In [10]:
MIREX = [file for file in sorted(os.listdir('../data/audio/MIREX/'))if not file.startswith('.')]
MIREX = [file[:-4] for file in MIREX]

In [11]:
new_data = pd.DataFrame(columns=data.columns)
new_data['file'] = MIREX
new_data['data_set'] = 6
new_data['idx'] = np.arange(len(data),len(data)+len(MIREX))

data = data.append(new_data, ignore_index=True)

## Delete rows

In [5]:
data = data.drop([2023], axis=0)

In [20]:
data.index = range(len(data))
data.idx = range(len(data))

## Rename columns

In [29]:
data = data.rename(index=str, columns={'AMLt_mad': 'At_mad'})

## Delete columns

In [18]:
data = data.drop(columns=['loss_tcn'])

## Chance columns

In [4]:
data.columns

Index(['idx', 'file', 'data_set', 'f_mad', 'f_TCN', 'Cc_TCN', 'Ct_TCN',
       'Ac_TCN', 'At_TCN', 'Cc_mad', 'Ct_mad', 'Ac_mad', 'At_mad', 'D_TCN',
       'D_mad', 'loss'],
      dtype='object')

In [5]:
data = data[['idx', 'file', 'data_set', 'f_TCN', 'f_mad', 'Cc_TCN', 'Ct_TCN',
       'Ac_TCN', 'At_TCN', 'Cc_mad', 'Ct_mad', 'Ac_mad', 'At_mad', 'D_TCN',
       'D_mad', 'loss']]

## Save data

In [13]:
data.to_csv('../data/data.csv')

In [77]:
performance.to_csv('../data/performance.csv')