In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import signal
from scipy.io import wavfile
import io
from PIL import Image
import librosa

#need to change this bit
test_file = '/home/george/Documents/george_vae/testing/Y9_44818.40200373_9_14_11_10_0.wav'
root = '/home/george/Documents/other/song_extractor'

In [None]:
# create sonogram of the whole song 
 
# wav to numpy array
def wav_to_numpy(file):
    rate, data = wavfile.read(file)
    return rate, data

def createSonogram(songfile):
    data = wav_to_numpy(songfile)
    rate = data[0]
    data = data[1]

    f, t, Sxx = signal.spectrogram(data, rate, nfft=512, noverlap=25, scaling="density")

    fmin = 400 # Hz
    fmax = 8000 # Hz
    freq_slice = np.where((f >= fmin) & (f <= fmax))

    # keep only frequencies of interest
    f   = f[freq_slice]
    Sxx = Sxx[freq_slice,:][0]

    Sxx = np.log(Sxx + 1)

    np_sonogram = np.array(Sxx, dtype="float32")
    # figure size in inches 1,1
    # plt.figure(figsize=(300,5))

    # plt.pcolormesh(Sxx)
    # plt.axis('off')
    return plt, np_sonogram 

# np sonogram is the numpy array of the sonogram
plt, np_sonogram = createSonogram(test_file)
print(np_sonogram.shape)
print(librosa.get_duration(filename=test_file) * 1000)

ms_per_x_pixels = (librosa.get_duration(filename=test_file) * 1000) / np_sonogram.shape[1]
print(ms_per_x_pixels)

In [None]:
# reverse the axis of the np array dim1 becomes dim 0 and vice versa
np_sonogram = np_sonogram.T

vertical_sum = np.sum(np_sonogram, axis=1)

# x axis is the time axis
plt.plot(vertical_sum) 

In [None]:
from matplotlib.pyplot import axis 

window_size = 100
threshold = 20

# window slides over vertical sum, takes the average and keeps it if it is above threshold
def sliding_window_average(vertical_sum, window_size, threshold):
    window = np.ones(int(window_size))/float(window_size)
    y = np.convolve(vertical_sum, window, 'same')
    y = np.where(y > threshold, y, 0)
    return y

y = sliding_window_average(vertical_sum, window_size, threshold)

# get the indices of the locations where the vertical sum is above threshold
indices = np.where(y > 0)[0]
plt.figure(figsize=(300,5))
plt.imshow(np_sonogram[indices].T, origin='lower')


## Creating extracted image files

In [None]:
save_full_sonogram = False
list_of_np_sonograms = []

for file in os.listdir('/home/george/Documents/other/song_extractor/after'):
    os.remove('/home/george/Documents/other/song_extractor/after/' + file)

for file in os.listdir('/home/george/Documents/other/song_extractor/before'):
    # delete all the files in the after folder
    if file.endswith('.wav'):
        filename = file 
        file = root +'/before/' + file
        plt, np_sonogram = createSonogram(file)
        np_sonogram = np_sonogram.T
        vertical_sum = np.sum(np_sonogram, axis=1)
        y = sliding_window_average(vertical_sum, window_size, threshold)
        indices = np.where(y > 0)[0]

        if save_full_sonogram == True:
            plt.imshow(np_sonogram[indices].T, origin='lower')
            plt.axis('off')
            plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
            plt.margins(0,0)

            plt.savefig('/home/george/Documents/other/song_extractor/after/' + filename + '.png', bbox_inches='tight', pad_inches=0, dpi=300)

        list_of_np_sonograms.append(np_sonogram[indices].T)

## Create segments in here using the np_sonogram so you don't have to write and rewrite all the time 

In [None]:
def create_segments(sonogram):
    # 120ms 
    window_length = 88
    # 90 precent overlap 
    window_overlap = .9

    # step size is equal to the amount of pixels the frame needs to move
    # this is equal to the frame size multiplied by the overlap
    # the frame size needs to be adjusted to the amount of pixels per ms
    step_size = int(window_length * (1 - window_overlap))
    start_frame = 0
    end_frame = int(window_length)
    
    positions = []

    sonogram = np.swapaxes(sonogram, 0, 1)

    while end_frame < sonogram.shape[0]:
        if sonogram[start_frame:end_frame].shape == (window_length, window_length):
            positions.append([start_frame, end_frame])
        start_frame += step_size
        end_frame += step_size
    return positions

list_of_positions = []
for sonograms in list_of_np_sonograms:
    list_of_positions.append(create_segments(sonograms))

In [None]:
import matplotlib

# delete all files in the segments folder
for file in os.listdir('/home/george/Documents/other/song_extractor/segments'):
    os.remove('/home/george/Documents/other/song_extractor/segments/' + file)

for i, sonogram in enumerate(list_of_np_sonograms):
    print(f"sonogram number: {i}")
    sonogram = np.swapaxes(sonogram, 0, 1)
    for j, position in enumerate(list_of_positions[i]):
        segment = sonogram[position[0]:position[1]]
        segment = segment.T
        plt.figure(figsize=(1.28,1.28))
        plt.imshow(segment)
        plt.gca().set_axis_off()
        plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, 
                    hspace = 0, wspace = 0)
        plt.margins(0,0)
        plt.savefig('segments/test' + 'file:' + str(i) + '_segment:' + str(j) + '.png', dpi=100)
        plt.close()