In [9]:
import pandas as pd
import numpy as np
import os, pathlib
import librosa
from pathlib import Path
import glob
import pickle
import shutil
import datetime
import wave_manipulator

p_home = Path.cwd() / 'data'
p_temp1 = Path.cwd() / 'data' / 'UIOWA' / 'tmp1'
p_temp2 = Path.cwd() / 'data' / 'UIOWA' / 'tmp2'
p_wav = Path.cwd() / 'data' / 'UIOWA' / 'wav'
p_data = '/home/mirko/Documents/data/UIOWA/**'

working_direcs = [p_temp1, p_wav, p_temp2]
for direc in working_direcs:
    if not os.path.exists(direc):
        os.makedirs(direc) 



### Define Parameters for Instruments
___

In [2]:
Instruments = {'Piano' : ['skip', 0.0, 0, 0], 'Guitar' : ['run', 0.1, 20, 33000], 'Viola' : ['run', 0.1, 20, 25000],
         'Violin' : ['run', 0.1, 20, 25000], 'Double Bass' : ['run', 0.1, 20, 25000], 'Cymbals' : ['skip', 0.0, 0, 0],
         'Oboe' : ['run', 0.01, 20, 22000], 'Bass Clarinet' : ['run', 0.01, 20, 22000], 'Bass Flute' : ['run', 0.01, 20, 22000],
         'Bb Clarinet' : ['run', 0.01, 20, 22000], 'Eb Alto Saxophone' : ['run', 0.01, 20, 22000], 'Alto Flute' : ['run', 0.01, 20, 22000],
         'Flute' : ['run', 0.01, 20, 44000], 'Eb Clarinet' : ['run', 0.01, 20, 22000], 'Bb Soprano Saxophone' : ['run', 0.01, 20, 22000],
         'Tuba' : ['run', 0.01, 20, 22000], 'Bass Trombone' : ['run', 0.01, 20, 22000], 'Tenor Trombone' : ['run', 0.01, 20, 22000],
         'Bassoon' : ['run', 0.01, 20, 22000], 'Bb Trumpet' : ['run', 0.01, 20, 22000], 'Marimba' : ['run', 0.01, 20, 4000],
         'Bells' : ['run', 0.01, 20, 4000], 'Crotales' : ['skip', 0.0, 0, 0], 'Cymbals' : ['skip', 0.0, 0, 0], 'Gongs' : ['skip', 0.0, 0, 0],
         'Hand Percussion' : ['skip', 0.0, 0, 0], 'Tamtam' : ['skip', 0.0, 0, 0], 'Tambourines' : ['skip', 0.0, 0, 0],
         'Vibraphone' : ['run', 0.01, 20, 34200], 'Xylophone' : ['run', 0.01, 20, 22000], 'Cello' : ['run', 0.1, 20, 25000], 'Horn' : ['run', 0.01, 20, 22000]
        }

In [2]:
# Instruments = {'Horn' : ['run', 0.01, 20, 22000]}

### Read and structure raw files from disc
___

In [6]:
pathframe = wave_manipulator.create_pathfile(p_data) #Read the filestructure and dump to disc
wave_manipulator.create_rawfiles2(pathframe, p_temp1)

17:29:55 : Creating Pandas Frame with path and labels


### Resample to target rate
___

In [15]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    data = wave_manipulator.resample2(data, sr=44100)
    data.to_pickle(p_temp2 / (instrument + '.pkl'))

### Slice and trim the takeouts
___

In [16]:

for instrument, value in Instruments.items():
    if value[0] == 'skip':
        shutil.copyfile(p_temp2 / (instrument + '.pkl'), p_temp1 / (instrument + '.pkl'))
    else:
        print(instrument, value)
        data = pd.read_pickle(p_temp2 / (instrument + '.pkl'))
        for i in range(60, 20, -1):
            data = wave_manipulator.slice_recording2(data, Ignore=132000, min_loud=value[1], top_db=i, no_samples=value[3])
            data = wave_manipulator.delete_trash(data, length=25000, top_db=100)
        data.to_pickle(p_temp1 / (instrument + '.pkl'))

Horn ['run', 0.01, 20, 22000]


### Iterate to trim until minimum value reached
___

In [17]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    
    for i in range(120, 20, -1):
        n = i/2
        data = wave_manipulator.trim_silence2(data, Ignore=44100*3, top_db=n)
        
    data.to_pickle(p_temp1 / (instrument + '.pkl'))

### Slice trailing end over given length
___

In [18]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    data = wave_manipulator.cut_end(data, length=132300)
    data.to_pickle(p_temp1 / (instrument + '.pkl'))

### Clean the edges
___

In [19]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    data = wave_manipulator.clean_edges(data, length=132299)
    data.to_pickle(p_temp1 / (instrument + '.pkl'))

### Once again drop too silent recordings
___

In [20]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    data = wave_manipulator.delete_trash(data, length=200000, top_db=0.01)
    data.to_pickle(p_temp1 / (instrument + '.pkl'))

### Add Directory name
___

In [8]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    data['directories'] = '[' + data.source + ']' + '[' + data.type + ']'+ '[' + data.instrument + ']' + '[' + data['style'] + ']'
    data.to_pickle(p_temp1 / (instrument + '.pkl'))

In [None]:
data = pd.read_pickle(p_temp1 / ('Piano' + '.pkl'))
data

### Drop to wave for Inspection
___

In [10]:
for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    wave_manipulator.to_wav3(data, p=p_wav, sr=44100)


### Initial Analysis
___

In [3]:
classlist = []
count = []

for instrument, value in Instruments.items():
    data = pd.read_pickle(p_temp1 / (instrument + '.pkl'))
    classes = set(data.directories)
    for item in classes:
        pad = data.loc[data['directories'] == item]
        classlist.append(item)
        count.append(len(pad))
        
structure = pd.DataFrame(list(zip(classlist, count)),
                              columns = ['Directory', 'Nos'])


In [4]:
structure

Unnamed: 0,Directory,Nos
0,[UIOWA][Piano][Piano][unused],206
1,[UIOWA][Guitar][Guitar][unused],736
2,[UIOWA][Strings][Viola][Arco],304
3,[UIOWA][Strings][Viola][Pizzicato],781
4,[UIOWA][Strings][Violin][Pizzicato],643
...,...,...
59,[UIOWA][Percussion][Xylophone][Xylophone],119
60,[UIOWA][Percussion][Xylophone][unused],380
61,[UIOWA][Strings][Cello][Arco],876
62,[UIOWA][Strings][Cello][Pizzicato],1081


### Restructure_1
___

In [79]:

Label = []

restructure = structure.copy()

for index, row in restructure.iterrows():
    direc = row.Directory
    inst = direc.split('][')[2]
    if 'Pizzicato' in direc:
        Label.append('noise')
    elif 'Percussion' in direc:
        Label.append('noise')
    else:
        Label.append(inst)
restructure['Label'] = Label

classes = set(restructure.Label)

classlist = []
count = []

for item in classes:
        pad = restructure.loc[restructure.Label == item]
        classlist.append(item)
        count.append(sum(pad.Nos))
        
restructure_an = pd.DataFrame(list(zip(classlist, count)),
                              columns = ['Label', 'Number of samples'])




In [88]:
restructure

Unnamed: 0,Directory,Nos,Label
0,[UIOWA][Piano][Piano][unused],206,Piano
1,[UIOWA][Guitar][Guitar][unused],736,Guitar
2,[UIOWA][Strings][Viola][Arco],304,noise
3,[UIOWA][Strings][Viola][Pizzicato],781,noise
4,[UIOWA][Strings][Violin][Arco],325,noise
...,...,...,...
59,[UIOWA][Percussion][Xylophone][Xylophone],119,noise
60,[UIOWA][Percussion][Xylophone][unused],380,noise
61,[UIOWA][Strings][Cello][Pizzicato],1081,noise
62,[UIOWA][Strings][Cello][Arco],876,Cello


### Restructure_2
___

In [5]:
inst_dict = {1:'cel', 2:'cla', 3:'flu', 4:'gac', 5:'gel',6:'org',7:'pia',8:'sax',9:'tru',10:'vio',11:'voi'}

instrument = []
Label = []
Label_int = []
restructure = structure.copy()

for index, row in restructure.iterrows():
    direc = row.Directory
    inst = direc.split('][')[2]
    if 'Pizzicato' in direc:
        Label.append('noise')
        instrument.append('various')
        Label_int.append(0)
    elif 'Percussion' in direc:
        Label.append('noise')
        instrument.append('various')
        Label_int.append(0)
    elif 'Trumpet' in direc:
        instrument.append(inst)
        Label.append('Trumpet')
        Label_int.append(9)
    elif 'Guitar' in direc:
        instrument.append(inst)
        Label.append('Guitar')
        Label_int.append(4)
    elif 'Piano' in direc:
        instrument.append(inst)
        Label.append('Piano')
        Label_int.append(7)
    elif 'Clarinet' in direc:
        instrument.append(inst)
        Label.append('Clarinet')
        Label_int.append(2)
    elif 'Saxophone' in direc:
        instrument.append(inst)
        Label.append('Saxophone')
        Label_int.append(8)
    elif 'Violine' in direc:
        instrument.append(inst)
        Label.append('Violine')
        Label_int.append(10)
    elif 'Flute' in direc:
        instrument.append(inst)
        Label.append('Flute')
        Label_int.append(3)
    elif 'Cello' in direc:
        instrument.append(inst)
        Label.append('Cello')
        Label_int.append(1)
        
    else:
        Label.append('noise')
        instrument.append('various')
        Label_int.append(0)
restructure['Label'] = Label
restructure['Instrument'] = instrument
restructure['Label_int'] = Label_int

inst = set(restructure.Instrument)

instlist = []
count = []
label = []

for item in inst:
        pad = restructure.loc[restructure.Instrument == item]
        instlist.append(item)
        count.append(sum(pad.Nos))
        label.append(pad.iloc[0].Label)
        
restructure_an = pd.DataFrame(list(zip(label, instlist, count)),
                              columns = ['Label', 'Instrument', 'Number of samples'])

data = wave_manipulator.create_dataframe(p_wav, restructure)

In [10]:
data.to_pickle(p_home / ('UIWOA_restructure_2.pkl'))

In [107]:
restructure_an.sort_values('Label')

Unnamed: 0,Label,Instrument,Number of samples
12,Cello,Cello,876
2,Clarinet,Bass Clarinet,185
6,Clarinet,Eb Clarinet,158
10,Clarinet,Bb Clarinet,185
1,Flute,Alto Flute,135
3,Flute,Flute,303
11,Flute,Bass Flute,140
7,Guitar,Guitar,736
8,Piano,Piano,206
0,Saxophone,Eb Alto Saxophone,256


In [108]:
restructure

Unnamed: 0,Directory,Nos,Label,Instrument
0,[UIOWA][Piano][Piano][unused],206,Piano,Piano
1,[UIOWA][Guitar][Guitar][unused],736,Guitar,Guitar
2,[UIOWA][Strings][Viola][Arco],304,noise,various
3,[UIOWA][Strings][Viola][Pizzicato],781,noise,various
4,[UIOWA][Strings][Violin][Arco],325,noise,various
...,...,...,...,...
59,[UIOWA][Percussion][Xylophone][Xylophone],119,noise,various
60,[UIOWA][Percussion][Xylophone][unused],380,noise,various
61,[UIOWA][Strings][Cello][Pizzicato],1081,noise,various
62,[UIOWA][Strings][Cello][Arco],876,Cello,Cello
