In [1]:
filename = "/Users/joemeyer/Documents/gpt2-sound-gen/sound_data_toy/violin_G4_phrase_forte_harmonic-glissando.wav"

In [2]:
def file_to_hex_str(filename):
    with open(filename,'rb') as f:
        words = f.read()
    return words.hex()

In [3]:
def hex_str_to_ls(hex_str):
    i = 0
    hex_ls = []
    while i < len(hex_str):
        hex_ls.append(hex_str[i:i+2])
        i+=2
    return hex_ls

In [4]:
def file_to_hex_ls(filename):
    hex_str = file_to_hex_str(filename)
    hex_ls = hex_str_to_ls(hex_str)
    return hex_ls

In [5]:
def get_number(hex_ls):
    # little endian (ones place at start)
    # e.g. ['10', '01'] = x0110 = 256 + 16 = 272
    hex_str = ''
    for h in hex_ls:
        hex_str = h + hex_str
    return int(hex_str, 16)

In [6]:
def get_n_bytes(n, hex_ls, i):
    # get next n bytes after index in in hex_ls
    n_bytes = hex_ls[i:i+n]
    i=i+n
    return n_bytes, i

In [7]:
def get_n_bytes_int(n, hex_ls, i):
    n_bytes, i = get_n_bytes(n, hex_ls, i)
    return get_number(n_bytes), i

In [8]:
def get_n_bytes_str(n, hex_ls, i):
    n_bytes, i = get_n_bytes(n, hex_ls, i)
    return read_hex_ls(n_bytes), i

In [9]:
def read_hex_ls(hex_str):
    chars = ''
    for h in hex_str:
        chars += chr(int(h, 16))
    return chars

In [10]:
def extract_header(hex_ls):
    i = 0
    # check chunk format
    expected_chunk_format = 'RIFF'
    chunk_format, i = get_n_bytes_str(4, hex_ls, i)
    assert chunk_format == expected_chunk_format, chunk_format
    # get chunk size
    chunk_size, i = get_n_bytes_int(4, hex_ls, i)
    print(f"chunk_size: {chunk_size}")
    # check fmt stuff
    expected_next_chunk = 'WAVEfmt'
    next_chunk, i = get_n_bytes_str(7, hex_ls, i)
    assert next_chunk == expected_next_chunk, next_chunk
    # skip a byte
    _, i = get_n_bytes(1, hex_ls, i)
    # get subchunk1size
    subchunk1size, i = get_n_bytes_int(4, hex_ls, i)
    print(f"subchunk1size: {subchunk1size}")
    # make sure it's PCM
    audio_format, i = get_n_bytes_int(2, hex_ls, i)
    pulse_code_modulation_format = 1
    assert audio_format == pulse_code_modulation_format, audio_format
    # get num chans
    num_channels, i = get_n_bytes_int(2, hex_ls, i)
    print(f"num_channels: {num_channels}")
    # get sample rate
    sample_rate, i = get_n_bytes_int(4, hex_ls, i)
    print(f"sample_rate: {sample_rate}")
    # get byte_rate
    byte_rate, i = get_n_bytes_int(4, hex_ls, i)
    print(f"byte_rate: {byte_rate}")
    # get block_align
    block_align, i = get_n_bytes_int(2, hex_ls, i)
    print(f"block_align: {block_align}")
    # get bits_per_sample
    bits_per_sample, i = get_n_bytes_int(2, hex_ls, i)
    print(f"bits_per_sample: {bits_per_sample}")
    print("header extracted.") 
    # skip any padding
    next_bytes = ''
    while next_bytes != 'data':
        next_bytes, i = get_n_bytes_str(4, hex_ls, i)
    # get subchunk2size
    subchunk2size, i = get_n_bytes_int(4, hex_ls, i)
    print(f"subchunk2size: {subchunk2size}")
    return hex_ls, num_channels, bits_per_sample, i

In [11]:
def extract_body(hex_ls, num_channels, binary_bits_per_sample, i):
    assert binary_bits_per_sample % 4 == 0
    hex_bits_per_sample = binary_bits_per_sample // 4  # 16 = 2**4
    assert hex_bits_per_sample % num_channels == 0
    hex_bits_per_sample_per_channel = hex_bits_per_sample // num_channels
    from collections import defaultdict
    data_channels = defaultdict(list)  # maps {channel(int): data(list)}
    while i < len(hex_ls):
        for channel in range(num_channels):
            channel_sample, i = get_n_bytes_int(hex_bits_per_sample_per_channel, hex_ls, i)
            data_channels[channel].append(channel_sample)
    return data_channels

In [12]:
def extract_data(filename):
    data_hex_ls = file_to_hex_ls(filename)
    hex_ls, num_channels, bits_per_sample, i = extract_header(data_hex_ls)
    data_channels = extract_body(hex_ls, num_channels, bits_per_sample, i)
    return data_channels

In [16]:
data_hex_ls = file_to_hex_ls(filename)
data_hex_ls

['52',
 '49',
 '46',
 '46',
 'f0',
 'f4',
 '03',
 '00',
 '57',
 '41',
 '56',
 '45',
 '66',
 '6d',
 '74',
 '20',
 '10',
 '00',
 '00',
 '00',
 '01',
 '00',
 '01',
 '00',
 '44',
 'ac',
 '00',
 '00',
 '88',
 '58',
 '01',
 '00',
 '02',
 '00',
 '10',
 '00',
 '64',
 '61',
 '74',
 '61',
 '00',
 'f0',
 '03',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',

In [17]:
hex_ls, num_channels, bits_per_sample, i = extract_header(data_hex_ls)

chunk_size: 259312
subchunk1size: 16
num_channels: 1
sample_rate: 44100
byte_rate: 88200
block_align: 2
bits_per_sample: 16
header extracted.
subchunk2size: 258048


In [24]:
hex_ls

['52',
 '49',
 '46',
 '46',
 'f0',
 'f4',
 '03',
 '00',
 '57',
 '41',
 '56',
 '45',
 '66',
 '6d',
 '74',
 '20',
 '10',
 '00',
 '00',
 '00',
 '01',
 '00',
 '01',
 '00',
 '44',
 'ac',
 '00',
 '00',
 '88',
 '58',
 '01',
 '00',
 '02',
 '00',
 '10',
 '00',
 '64',
 '61',
 '74',
 '61',
 '00',
 'f0',
 '03',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',
 '00',

In [64]:
d = {0: [1,2,4], 5: [3,5,6]}

In [65]:
d

{0: [1, 2, 4], 5: [3, 5, 6]}

In [66]:
for ix, val in d.items():
    d[ix] = double_ls(val)

In [67]:
d

{0: [2, 4, 8], 5: [6, 10, 12]}

In [38]:
def double_ls(ls): return list(map(lambda x: x *2, ls))

In [41]:
map(double_ls, d.values())

<map at 0x7fe640c71c90>

In [42]:
d

{0: [1, 2, 4], 5: [3, 5, 6]}

In [37]:
list(double_ls([1,2,4]))

[2, 4, 8]

In [31]:
d

{0: [1, 2, 4], 5: [3, 5, 6]}

In [20]:
binary_bits_per_sample = bits_per_sample

In [23]:
assert binary_bits_per_sample % 4 == 0
hex_bits_per_sample = binary_bits_per_sample // 4  # 16 = 2**4
hex_bits_per_sample

4

In [22]:
assert hex_bits_per_sample % num_channels == 0
hex_bits_per_sample_per_channel = hex_bits_per_sample // num_channels
from collections import defaultdict
data_channels = defaultdict(list)  # maps {channel(int): data(list)}
while i < len(hex_ls):
    for channel in range(num_channels):
        channel_sample, i = get_n_bytes_int(hex_bits_per_sample_per_channel, hex_ls, i)
        data_channels[channel].append(channel_sample)
data_channels

defaultdict(list, {})

In [13]:
extract_data(filename)

chunk_size: 259312
subchunk1size: 16
num_channels: 1
sample_rate: 44100
byte_rate: 88200
block_align: 2
bits_per_sample: 16
header extracted.
subchunk2size: 258048


defaultdict(list,
            {0: [0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
              0,
          