In [1]:
import pandas
import numpy
import matplotlib.pyplot as plt
import os

In [78]:
sf = pandas.read_csv('../strings.wav_data.csv')
sf.columns

Index([u'original filename', u'compressed name', u'Window Length',
       u'Window Overlap', u'RMS Error', u'Input Size', u'Compress File Size',
       u'Compression Ratio', u'FilterBank Num SubBands ', u'FilterBank Length',
       u'Adaptive Byte Buff', u'Adaptive Huffman', u'Model f1', u'Model f2',
       u'Comp IO Time (ms)', u'Decomp IO Time (ms)',
       u'Comp Filter Bank Time (ms)', u'Decomp Filter Bank Time (ms)',
       u'Comp MDCT Time (ms)', u'Decomp MDCT Time (ms)',
       u'Comp Byte Bufferizer (ms)', u'Decomp Byte Bufferizer (ms)',
       u'Comp Huffman Time (ms)', u'Decomp Huffman Time (ms)',
       u'Comp Serialize Time (ms)', u'Decomp Serialize Time (ms)',
       u'Total Compression time (ms)', u' Total Decompression time (ms)',
       u'Unnamed: 28'],
      dtype='object')

In [76]:
# load string wav data
sf = pandas.read_csv('../strings.wav_data.csv')
var_columns = sf.columns[[2,8,9,10,11,12,13]]
metric_columns = sf.columns[[4,7,26,27]]

# make plots of free variables vs metrics
for i in range(var_columns.size):
    name = var_columns[i]
    uniqueVars = sf[name].unique()
    print name, uniqueVars
    for j in range(metric_columns.size):
        metric = metric_columns[j]
        d = [ sf[ sf[name]==uniqueVars[k] ][metric].values for k in range(uniqueVars.size)]
        plt.figure(figsize=(10,7))
        plt.title(sf["original filename"][0] + "\n" + metric + " vs. " + name, size=20)
        plt.ylabel(metric, size=16)
        plt.xlabel(name, size=16)
        for label in (plt.gca().get_xticklabels() + plt.gca().get_yticklabels()):
            label.set_fontsize(16)
        plt.boxplot(d, labels=uniqueVars, showmeans=True)
        plt.savefig('./stringsWav/' + name + '_' + metric + '.png')
        plt.close()
        

Window Length [ 256  512 1024 2048 4096]
FilterBank Num SubBands  [ 2  4  8 16 32]
FilterBank Length [ 256  512 1024 2048 4096]
Adaptive Byte Buff [True False]
Adaptive Huffman [True False]
Model f1 [   0.  100.  200.  300.]
Model f2 [     0.  15000.  12500.  10000.   8000.]


In [74]:
# load all data
files = [f for f in os.listdir('../') if f[-3:]=="csv"]
frames = [pandas.read_csv('../'+f) for f in files]
var_columns = frames[0].columns[[2,8,9,10,11,12,13]]
metric_columns = frames[0].columns[[4,7,26,27]]

for i in range(var_columns.size):
    name = var_columns[i]
    uniqueVars = sf[name].unique()
    print name, uniqueVars
    for j in range(metric_columns.size):
        metric = metric_columns[j]
        plt.figure(figsize=(13,7))
        plt.ylabel(metric, size=16)
        plt.xlabel(name, size=16)
        for label in (plt.gca().get_xticklabels() + plt.gca().get_yticklabels()):
            label.set_fontsize(16)
        plt.title("Mean values for\n" + metric + " vs. " + name, size=20)
        for frame in frames:
            d = [ frame[ frame[name]==uniqueVars[k] ][metric].values.mean() for k in range(uniqueVars.size)]
            plt.scatter(uniqueVars, d, label=frame["original filename"][0], s=100)
        
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0, fontsize=16)
        plt.gca().set_position([0.1,0.12,0.58,0.75])
        plt.savefig('./meanPlots/' + name + '_' + metric + '_mean.png')
        plt.close()

Window Length [ 256  512 1024 2048 4096]
FilterBank Num SubBands  [ 2  4  8 16 32]
FilterBank Length [ 256  512 1024 2048 4096]
Adaptive Byte Buff [True False]
Adaptive Huffman [True False]
Model f1 [   0.  100.  200.  300.]
Model f2 [     0.  15000.  12500.  10000.   8000.]


In [75]:
# load all data
files = [f for f in os.listdir('../') if f[-3:]=="csv"]
frames = [pandas.read_csv('../'+f) for f in files]
var_columns = frames[0].columns[[2,8,9,10,11,12,13]]
metric_columns = frames[0].columns[[4,7,26,27]]
filenames = [ frame.get_values()[0][0] for frame in frames]

for j in range(metric_columns.size):
    metric = metric_columns[j]
    print metric
    plt.figure(figsize=(13,10))
    plt.ylabel(metric, size=16)
    plt.xlabel(name, size=16)
    for label in (plt.gca().get_xticklabels() + plt.gca().get_yticklabels()):
        label.set_fontsize(16)
    plt.title(metric + " vs. input file", size=20)
    d = [ frame[metric].values for frame in frames]
    plt.boxplot(d, showmeans=True, labels=filenames)
    plt.gca().set_xticklabels(plt.gca().xaxis.get_majorticklabels(), rotation=-60)
    plt.gca().set_position([0.125,0.3,0.8,0.6])
    plt.savefig('./allFilesPlots/' + metric + '_fileBoxPlots.png')
    plt.close()

RMS Error
Compression Ratio
Total Compression time (ms)
 Total Decompression time (ms)


In [103]:
# find the extremes of rms and compression
# load all data
files = [f for f in os.listdir('../') if f[-3:]=="csv"]
frames = [pandas.read_csv('../'+f) for f in files]

frame = frames[4]

idx = frame['RMS Error'].idxmin()
print frame.ix[idx], '\n'

idx = frame['Compression Ratio'].idxmin()
print frame.ix[idx], '\n'

idx = frame['Total Compression time (ms)'].idxmin()
print frame.ix[idx], '\n'

idx = frame[' Total Decompression time (ms)'].idxmin()
print frame.ix[idx], '\n'



original filename                 drumkit.wav
compressed name                   drumkit.jet
Window Length                            4096
Window Overlap                              0
RMS Error                          0.00492809
Input Size                            2684136
Compress File Size                    2590238
Compression Ratio                    0.965017
FilterBank Num SubBands                     8
FilterBank Length                        4096
Adaptive Byte Buff                       True
Adaptive Huffman                         True
Model f1                                    0
Model f2                                    0
Comp IO Time (ms)                    0.216131
Decomp IO Time (ms)                   512.639
Comp Filter Bank Time (ms)            250.262
Decomp Filter Bank Time (ms)          42.4406
Comp MDCT Time (ms)                         0
Decomp MDCT Time (ms)                       0
Comp Byte Bufferizer (ms)             69.3427
Decomp Byte Bufferizer (ms)       

In [31]:
# simple numerical stats

# load all data
files = [f for f in os.listdir('../') if f[-3:]=="csv"]
frames = [pandas.read_csv('../'+f) for f in files]
var_columns = frames[0].columns[[2,8,9,10,11,12,13]]
metric_columns = frames[0].columns[[4,7,26,27]]

x = numpy.zeros(1)
print x.max()

for frame in frames:
    print "\n\n", frame.get_values()[0][0]
    print "======================="
    for metric in metric_columns:
        print metric + " range: \t", frame[metric].values.min(), " - ", frame[metric].values.max()

0.0


kickdrum.wav
RMS Error range: 	0.006848175  -  0.065828174
Compression Ratio range: 	0.15176006  -  1.3883418
Total Compression time (ms) range: 	137.493691  -  1055.351196
 Total Decompression time (ms) range: 	143.93418  -  1048.566889


ep_low.wav
RMS Error range: 	0.0051043807  -  0.049986288
Compression Ratio range: 	0.30511665  -  1.3779107
Total Compression time (ms) range: 	347.509291  -  2247.46045
 Total Decompression time (ms) range: 	359.655541  -  2244.8397


hihat.wav
RMS Error range: 	0.0021004002  -  0.029033162
Compression Ratio range: 	0.20670244  -  1.384866
Total Compression time (ms) range: 	193.677115  -  1355.136392
 Total Decompression time (ms) range: 	201.86115  -  1346.632388


synth_bass.wav
RMS Error range: 	0.016779352  -  0.15765792
Compression Ratio range: 	0.347698  -  1.3775389
Total Compression time (ms) range: 	189.223773  -  1222.78157
 Total Decompression time (ms) range: 	203.069496  -  1219.646751


drumkit.wav
RMS Error range: 	0.004928087