In [None]:
import os
# Mount Google Drive if running on Google Colab
try:
    from google.colab import drive
    mount_point = '/content/drive'
    drive.mount(mount_point)
    os.chdir(f'{mount_point}/MyDrive/swan_class/pythonaudio')
except ModuleNotFoundError:
    pass
print(f'The current working directory: \'{os.getcwd()}\'',)

In [None]:
# Auto reload imported libraries
%load_ext autoreload
%autoreload 2

import librosa
import librosa.display
import soundfile as sf
from IPython.display import Audio
from scipy.io.wavfile import write
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from PIL import Image, ImageOps
from processors import AudioProcessor, ImageProcessor
import base64
import cv2

In [None]:
def image_to_audio(image_path, save_path=None, rotate=0, padding=3, inverse_color=False, volume=1, edge_detection=False, display_image=False, plot_spectrogram=False):
    # Image processing
    ip = ImageProcessor()
    ip.load_image(image_path, mode='RGB')
    ip.resize(600)
    if inverse_color:
        ip.inverse_color()
    if edge_detection:
        ip.edge_detection()
    if rotate:
        ip.rotate(rotate)
    ip.add_top_padding(padding)
    ip.convert_type('L')
    if display_image:
        ip.display_image()
    ip.flip()

    # Transform to Audio
    ap = AudioProcessor(44100)
    ap.load_image_form_array(ip.image_array)
    ap.image_to_spectrogram(inverse_transform=False)
    if plot_spectrogram:
        ap.plot_spectrogram()
    ap.spectrogram_to_wave()
    ap.normalize_audio()
    ap.change_volume(volume)
    ap.play_sound(save_path)

In [None]:
image_to_audio('image/doge.png', edge_detection=True, plot_spectrogram=True, display_image=True, inverse_color=True)

# Classic edge detection

In [None]:
image = Image.open('image/Mona_Lisa.jpg')
image = np.array(image)
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

image_copy = image.copy() * 0
cv2.drawContours(image=image_copy, contours=contours, contourIdx=-1, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA)
Image.fromarray(image_copy).save('image.png')
Image.fromarray(image_copy)

In [None]:
class CropLayer(object):
    def __init__(self, params, blobs):
        self.xstart = 0
        self.xend = 0
        self.ystart = 0
        self.yend = 0

    # Our layer receives two inputs. We need to crop the first input blob
    # to match a shape of the second one (keeping batch size and number of channels)
    def getMemoryShapes(self, inputs):
        inputShape, targetShape = inputs[0], inputs[1]
        batchSize, numChannels = inputShape[0], inputShape[1]
        height, width = targetShape[2], targetShape[3]

        self.ystart = (inputShape[2] - targetShape[2]) // 2
        self.xstart = (inputShape[3] - targetShape[3]) // 2
        self.yend = self.ystart + height
        self.xend = self.xstart + width

        return [[batchSize, numChannels, height, width]]

    def forward(self, inputs):
        return [inputs[0][:,:,self.ystart:self.yend,self.xstart:self.xend]]

In [None]:
cv2.dnn_registerLayer('Crop', CropLayer)
net = cv2.dnn.readNet('edge_detection_model/deploy.prototxt', 'edge_detection_model/hed_pretrained_bsds.caffemodel')

In [None]:
ip = ImageProcessor()
ip.load_image('image/b.png', mode=None)
ip.resize()
image_array = ip.image_array
image_array

In [None]:
ip = ImageProcessor()
ip.load_image('image/b.png')
ip.resize()
ip.edge_detection()
ip.edge_detected_image

In [None]:
%%timeit
a=1

In [None]:
image = Image.open('image/b.png')
image = np.array(image)
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

image_copy = image.copy() * 0
cv2.drawContours(image=image_copy, contours=contours, contourIdx=-1, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA)
Image.fromarray(image_copy).save('image.png')
Image.fromarray(image_copy)

In [None]:
inp = cv2.dnn.blobFromImage(image_array, scalefactor=1.0,
                            mean=(104.00698793, 116.66876762, 122.67891434),
                            swapRB=False, crop=False)
net.setInput(inp)
out = net.forward()

In [None]:
image_to_audio('image/b.png', plot_spectrogram=True, padding=0, inverse_color=True)

In [None]:
path = 'image/paint.png'
ip = ImageProcessor()
ip.load_image(path)
ip.resize(600)
size = ip.image.size
ip.add_top_padding(5)
# ip.rotate(-90)
# ip.image = ip.image.resize(size)
ip.flip()
# ip.inverse_color()
ip.display_image()

In [None]:
image = Image.open('spectrogram/b.png')
image = np.array(image)
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(image=thresh, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_NONE)

image_copy = image.copy() * 0
cv2.drawContours(image=image_copy, contours=contours, contourIdx=-1, color=(255, 255, 255), thickness=2, lineType=cv2.LINE_AA)
Image.fromarray(image_copy).save('image.png')
Image.fromarray(image_copy)

In [None]:
image_to_audio('image.png', plot_spectrogram=True)

In [None]:
image_to_audio('spectrogram/b.png', plot_spectrogram=True, padding=0, inverse_color=True)

In [None]:
path = 'spectrogram/paint.png'
ip = ImageProcessor()
ip.load_image(path)
ip.resize(600)
size = ip.image.size
ip.add_top_padding(5)
# ip.rotate(-90)
# ip.image = ip.image.resize(size)
ip.flip()
# ip.inverse_color()
ip.display_image()

In [None]:
ap = AudioProcessor(44100)
ap.load_image_form_array(ip.image_to_array())
ap.image_to_spectrogram(inverse_transform=False)
ap.plot_spectrogram()
ap.spectrogram_to_wave()
ap.normalize_audio()
ap.change_volume(0.1)
ap.play_sound('audio/paint.wav')

In [None]:
pad = 2
size = ip.image.size
image = ImageOps.pad(ip.image, (size[0], int(size[1]*pad)), centering=(0.5, 1))
# image = image.resize(size)
image