<a href="https://colab.research.google.com/github/hfwittmann/sound/blob/master/Compress_audio_via_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook we will use image compression technique to compress audio signals.

The audio files in this notebook are from the ESC-50 dataset:
    - ESC-50: Dataset for Environmental Sound Classification
    - https://github.com/karoldvl/ESC-50/
    - https://dx.doi.org/10.7910/DVN/YDEPUT


A good application of svd and a demo for images is here:
http://timbaumann.info/svd-image-compression-demo/


The outline is as follows:

- Show a function that compresses an image (with a matrix based technique called svd)
- Load 

In [0]:
%matplotlib inline
import numpy as np

# Define plotting helper functions

In [0]:
from librosa import display

def plot_spectrum (data, name):
    display.specshow(data, y_axis='log', x_axis='time')
    plt.title(f'Power spectrogram of {name}')
    plt.colorbar(format='%+2.0f dB')
    plt.tight_layout()
    # plt.show()

# Define compression helper functions

In [0]:
random_seed = 0
from sklearn.decomposition import TruncatedSVD
np.random.seed(random_seed)

def compress(imageIn, n_components=100,random_seed=0):
    
    image = imageIn
    
    if len (image.shape) == 2:
        # print('Found Gray image')
        image = image[:,:,np.newaxis] # Equivalent to x[:,np.newaxis]
    
    if len (image.shape) == 3:
        # print('Found RGB image')
        pass
        
    if len (image.shape) > 3:
        raise('not sure what image type this')
        
    n_of_layers = image.shape[2]
    
    compressed_list = []
    
    for layer in range(n_of_layers):
        # print(layer)
        image_layer = image[:,:,layer] # ie r, g or b
        
        clf = TruncatedSVD(n_components=n_components)
        clf.fit(image_layer)
        compressed_layer = clf.inverse_transform(clf.transform(image_layer))
        
        compressed_list.append(compressed_layer)
        
        
    compressed = np.stack(compressed_list, axis=2)
    
    
    
    # clip to expected range
    compressed = np.clip(compressed, a_min=0, a_max=255)
    
    # cast to same dtype as original image
    compressed = np.array(compressed, dtype = image.dtype)
    
    # reshape to original image size
    compressed = compressed.reshape(imageIn.shape)
    
    return compressed

In [0]:
def compress_complex(complex_image, n_components=100,random_seed=0, doplot=False):
    '''
    complex in the sense of complex numbers,
    https://en.wikipedia.org/wiki/Complex_number
    
    ie having a real and an imaginary part
    
    '''
    real = np.real(complex_image)
    imag = np.imag(complex_image)
    
    compress_real = compress(real, n_components, random_seed)
    compress_imag = compress(real, n_components, random_seed)
    
    if doplot:
        plot_spectrum(real, 'Real Part')
        plot_spectrum(compress_real, 'Real Part Compressed')
        
        plot_spectrum(imag, 'Imaginary Part')
        plot_spectrum(compress_imag, 'Imaginary Part Compressed')
    
    compressed = compress_real + 1j * compress_imag
    return compressed
    

# Compress Images

In [0]:
from sklearn.datasets import load_sample_images
dataset = load_sample_images()

In [24]:
dataset.DESCR

'Image: china.jpg\nReleased under a creative commons license. [1]\nAttribution: Some rights reserved by danielbuechele [2]\nRetrieved 21st August, 2011 from [3] by Robert Layton\n\n[1] https://creativecommons.org/licenses/by/2.0/\n[2] https://www.flickr.com/photos/danielbuechele/\n[3] https://www.flickr.com/photos/danielbuechele/6061409035/sizes/z/in/photostream/\n\n\nImage: flower.jpg\nReleased under a creative commons license. [1]\nAttribution: Some rights reserved by danielbuechele [2]\nRetrieved 21st August, 2011 from [3] by Robert Layton\n\n[1] https://creativecommons.org/licenses/by/2.0/\n[2] https://www.flickr.com/photos/vultilion/\n[3] https://www.flickr.com/photos/vultilion/6056698931/sizes/z/in/photostream/\n\n\n\n'

In [0]:
def myplot(compressed_image):
    plt.imshow(compressed_image)

In [34]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import matplotlib.pyplot as plt

@interact(ImageNumber=range(len(dataset.images)), 
          n_components=[1,2,5,10,20,50])
def plot_compressed(ImageNumber,n_components=5):
    image = dataset.images[ImageNumber]
    compressed_image = compress(image, n_components=n_components)
    plt.figure(figsize=(20, 8))
    myplot(compressed_image)

interactive(children=(Dropdown(description='ImageNumber', options=(0, 1), value=0), Dropdown(description='n_co…

# Sounds
The audio files in this notebook are from the ESC-50 dataset:

- ESC-50: Dataset for Environmental Sound Classification
- https://github.com/karoldvl/ESC-50/
- https://dx.doi.org/10.7910/DVN/YDEPUT

In [0]:
def load_audio_file(filepath):
    # %%
    y_multichannel, sr = lr.load(filepath, mono=False)
    print(y_multichannel.shape)

    if len(y_multichannel.shape)>1:
        channels = [0]
        y_channel_selection = y_multichannel[tuple([channels])]

        y = np.mean(y_channel_selection, axis=0)
    else:
        y = np.array(y_multichannel)
    
    return y, sr

# Download sound files from github

In [0]:
base_url = 'https://github.com/hfwittmann/sound/raw/master/sounds/'
sounds_dict = {'Cat 1': base_url + 'cats/2-82274-A-5.wav', 
               'Cat 2': base_url + 'cats/2-82274-B-5.wav', 
               'Can opening': base_url + 'can_opening/3-155659-A-34.wav'}

In [0]:
import requests
def download_soundfile(url, name):
    
    print(f'downloading {url} to file {name}')

    # download the file contents in binary format
    r = requests.get(url, allow_redirects=True)

    # open method to open a file on your system and write the contents
    with open(f"{name}", "wb") as code:
        code.write(r.content)
        

In [30]:
# do downloads
for name, url in sounds_dict.items():
    # print(f'downloading {name}')
    download_soundfile(url, f'{name}.wav')

downloading https://github.com/hfwittmann/sound/raw/master/sounds/cats/2-82274-A-5.wav to file Cat 1.wav
downloading https://github.com/hfwittmann/sound/raw/master/sounds/cats/2-82274-B-5.wav to file Cat 2.wav
downloading https://github.com/hfwittmann/sound/raw/master/sounds/can_opening/3-155659-A-34.wav to file Can opening.wav


# Analyse Sound files

In [31]:
import pathlib
import IPython
import librosa as lr
from glob import glob
import numpy as np
import matplotlib.pyplot as plt



@interact(name=sounds_dict.keys(),
          plt=plt.figure(figsize=(15, 7))
         )
def myprint(name):
    print(f'{name}.wav')
    y, sr = load_audio_file(f'{name}.wav')
    IPython.display.display(IPython.display.Audio(y, rate=sr))
    
    mysftf = lr.stft(y, n_fft= 1024, hop_length= 512)
    
    plt.figure(figsize=(15, 7))
    plot_spectrum(np.log(np.abs(mysftf)), 'Log of Absolute of Compressed')


<Figure size 1080x504 with 0 Axes>

interactive(children=(Dropdown(description='name', options=('Cat 1', 'Cat 2', 'Can opening'), value='Cat 1'), …

# Compress sound files

In [32]:
@interact(name=sounds_dict.keys(),
          n_components=[1,2,5,10,20,50])
def myprint(name, n_components=20):
    print(name)
    y, sr = load_audio_file(f'{name}.wav')
    # IPython.display.display(IPython.display.Audio(y, rate=sr))
    mysftf = lr.stft(y, n_fft= 1024, hop_length= 512)
    # plot_spectrum(np.abs(mysftf), 'Absolute of Uncompressed')
    # 
    
    mysftf_compressed = compress_complex(mysftf, n_components=n_components, doplot=False)
    
    plt.figure(figsize=(15, 7))
    plot_spectrum(np.log(np.abs(mysftf_compressed)), 'Log of Absolute of Compressed')

    Original_memory = np.prod(mysftf_compressed.shape)
    Compressed_memory = (1 + np.sum(mysftf_compressed.shape) ) * n_components
    print(f'The compressed memory is roughly {100 * 2 * Compressed_memory / Original_memory:0.0f}% of the original')
    y_inverted_sftf = lr.istft(mysftf_compressed, hop_length= 512)
    IPython.display.display(IPython.display.Audio(y_inverted_sftf, rate=22050))
    print('\n')

interactive(children=(Dropdown(description='name', options=('Cat 1', 'Cat 2', 'Can opening'), value='Cat 1'), …