In [209]:
import pywt
import json
import numpy as np
from sklearn.decomposition import PCA
from PIL import Image
from os import getcwd, listdir
from os.path import join
import random

Para descargar el dataset:

In [None]:
'''url =  'https://drive.google.com/uc?id=1yh76NNJXhH71DonWlAoIvomYcNJ79bd3&export=download'
output = 'images.zip'
gdown.download(url, output, quiet=False)

path = os.getcwd() + "/"
path += "images.zip"
archivo_zip = zipfile.ZipFile(path, "r")
archivo_zip.extractall()
archivo_zip.close()
'''

Variables y semilla

In [210]:
PATH = getcwd()
PATH_TO_DATASET = PATH + "/images"
PATH_TO_ENCODES = PATH + "/encodes"
random.seed(3589429)
np.random.seed(3589429)
np.random.rand()
random.random()

0.8365845167708341

Codificador de imagenes:
- Recibe las imagenes del dataset, las re-escala y procede a escribir sus eigenvectors
- También puede reducir aquellos eigenvalues que tengan una significancia menor a la deseada

In [281]:
class ImageEncoder:   
    def __init__(self):
        self.data = sorted(listdir(PATH_TO_DATASET))
        self.n = len(self.data)
        self.compressions = 7
        self.encodings = []
        return

    def encode_collection(self,iters=1,reduce_features=False,variance=0,resize_coef=128,debug=False):
        train_size = int(self.n * 80 / 100)
        validation_size = int(self.n * 10 / 100)
        dim = 0
        for iter in range(iters):
            slices = self.__compute_slices([10,10])
            encodings = []
            for _, slice in slices.items():
                for i in slice:
                    path_to_image = PATH_TO_DATASET + "/" + self.data[i]
                    image_code = Image.open(path_to_image)
                    image_resized = np.array(image_code.resize((resize_coef, resize_coef)))
                    image_resized = image_resized.flat
                    image_encoding = pywt.wavedecn(data=image_resized,wavelet='haar',mode='symmetric',level=7)[0]
                    dim = len(image_encoding)
                    image_encoding = list(image_encoding)
                    encodings.append(image_encoding)
            if reduce_features == True:
                pca = PCA(n_components=variance, svd_solver='full')
                pca.fit(encodings)
                transformed_encodings = pca.transform(encodings)
                dim = transformed_encodings.shape[1]
                encodings = []
                for encode in transformed_encodings:
                    encodings.append(encode.tolist())
            collection = {
                "Train": encodings[:train_size],
                "Validation": encodings[train_size:train_size+validation_size],
                "Test": encodings[train_size+validation_size:]
            }
            with open(f"{PATH_TO_ENCODES}/img_encodings_{iter+1}.json", "w") as output:
                output.truncate(0)
                json.dump(collection, output, indent=4)
                output.close()
        if debug == True:
            print(f"[ImageEncoder]: Se redujeron {self.n} imagenes a {resize_coef}x{resize_coef}.")
            print(f"[ImageEncoder]: Dichas reducciones, han sido escritas en {iters} colecciones.")
            print(f"[ImageEncoder]: Las dimensiones finales son de {dim}.")
        return 

    def __compute_slices(self,slices):
        idxs = [i for i in range(self.n)]
        validation_size = int(self.n * slices[0] / 100)
        test_size = int(self.n * slices[1] / 100)
        rand_arr = random.sample(range(self.n), validation_size + test_size)
        idxs = np.array(list(set(idxs) - set(rand_arr)))
        slices = {
            "Train": idxs, 
            "Validation": np.array(rand_arr[:validation_size]), 
            "Test": np.array(rand_arr[validation_size:])
        }
        return slices

In [282]:
ie = ImageEncoder()
ie.encode_collection(iters=3, reduce_features=True, variance=0.97, debug=True)

[ImageEncoder]: Se redujeron 832 imagenes a 128x128.
[ImageEncoder]: Dichas reducciones, han sido escritas en 3 colecciones.
[ImageEncoder]: Las dimensiones finales son de 38.
