In [9]:
import pandas as pd
import numpy as np
from PIL import Image
import os
import pywt
from natsort import natsorted
from sklearn.decomposition import PCA

In [2]:
IMG_PATH = 'data/'

In [5]:
def get_vector_from_image(image, iterations):
    """
    get_vector_from_image obtiene el vector característico de la imagen image

    :param image: Imagen en formato vector.
    :param iterations: Entero que indica la cantidad de veces que se aplica el wavelet a la imagen.
    :return LL.flatten(): Vector característico en 1D.
    """
    LL, (LH, HL, HH) = pywt.dwt2(image, 'haar')
    for _ in range(iterations - 1):
        LL, (LH, HL, HH) = pywt.dwt2(LL, 'haar')
    return LL.flatten()


In [6]:
def generate_data(src_dir, iterations, width=100, height=100):

    data = []

    for train_img in natsorted(os.listdir(src_dir + 'images/')):
        image_path = f"{src_dir}/images/{train_img}"
        img = Image.open(image_path)
        newsize = (width, height)
        img = img.resize(newsize)
        fv = get_vector_from_image(img, iterations)
        data.append(fv)
    df = pd.DataFrame(data, index =natsorted(os.listdir(src_dir + 'images/')))
    print(f"Tamaño del vector caracteristica con {iterations} cortes: {len(fv)}")
    return df


In [18]:
for i in range(9,10):
    data = generate_data(src_dir= IMG_PATH, iterations=i, width=512, height=512)
    # data = (data-data.min())/(data.max()-data.min())

    # for pos in range(len(data)):
    #     data.iloc[pos].to_csv(f"data/{i}_cortes/{data.index[pos][:-4]}.csv",index=False, index_label=None, header=False)

Tamaño del vector caracteristica con 9 cortes: 512


In [20]:
data.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,502,503,504,505,506,507,508,509,510,511
0010001.png,71004.75,71364.25,71755.25,72082.25,72609.0,73023.25,73359.0,73717.0,73961.0,74099.75,...,57102.75,57201.25,57389.75,57850.0,58501.5,59258.0,60253.5,61168.5,61739.5,62179.0
0010002.png,1098.0,556.0,101.0,51.0,61.0,62.0,126.0,91.0,44.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0010004.png,130557.0,130558.0,130556.0,130554.0,130555.0,130468.25,129644.0,129749.75,129249.25,128172.25,...,130556.0,130558.0,130560.0,130560.0,130560.0,130560.0,130560.0,130560.0,130560.0,130560.0
0010005.png,130559.0,130555.0,130552.0,130553.0,130537.0,129860.0,129729.25,129681.25,129101.5,128282.0,...,130492.25,130461.0,130489.0,130560.0,130560.0,130560.0,130560.0,130560.0,130560.0,130560.0
0010006.png,48459.25,48832.0,49522.75,50286.25,50930.5,51327.5,51607.0,51747.0,51758.0,51757.25,...,10015.5,9881.0,9869.25,9966.25,10123.75,10103.0,9995.25,9883.75,9765.0,9661.0


In [26]:
pca_data = PCA(n_components=20)
pca_data_componentes = pca_data.fit_transform(data)
df_pca = pd.DataFrame(data = pca_data_componentes)
df_pca = (df_pca-df_pca.min())/(df_pca.max()-df_pca.min())


In [27]:
df_pca

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.502534,0.373595,0.287810,0.332397,0.354086,0.341854,0.296236,0.405460,0.420895,0.329090,0.414909,0.358560,0.373923,0.405098,0.683094,0.402325,0.489663,0.346655,0.415266,0.548458
1,0.043257,0.426534,0.390166,0.354242,0.327850,0.299834,0.271551,0.472978,0.538702,0.329995,0.354481,0.369965,0.346881,0.500509,0.526805,0.283614,0.573220,0.448766,0.410175,0.492512
2,0.792971,0.370638,0.475659,0.527645,0.336288,0.356248,0.233941,0.462646,0.306464,0.316767,0.419674,0.311718,0.387786,0.485337,0.158480,0.312638,0.561838,0.254211,0.412580,0.229557
3,0.810870,0.387256,0.430073,0.523600,0.362482,0.378890,0.240507,0.476738,0.353830,0.252416,0.409819,0.307473,0.370441,0.529488,0.108687,0.258145,0.531024,0.328645,0.433521,0.210083
4,0.269332,0.403815,0.162444,0.577663,0.291545,0.423299,0.290645,0.471414,0.290812,0.301513,0.495265,0.254416,0.417069,0.460259,0.430690,0.477275,0.323422,0.551651,0.481840,0.515840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
827,0.216180,0.477872,0.422188,0.403505,0.347438,0.319424,0.249511,0.501955,0.513770,0.365000,0.365214,0.328471,0.459102,0.495328,0.653612,0.330241,0.594902,0.371816,0.361042,0.385156
828,0.381445,0.435241,0.389152,0.453037,0.358290,0.372926,0.264771,0.439191,0.414326,0.385159,0.358837,0.379025,0.382260,0.430301,0.517852,0.533136,0.513308,0.421182,0.421878,0.484957
829,0.424313,0.258316,0.536231,0.683116,0.236326,0.409512,0.050339,0.726369,0.189486,0.504089,0.247045,0.317666,0.366198,0.283345,0.508345,0.387292,0.515754,0.381219,0.406352,0.447848
830,0.497430,0.372506,0.375137,0.374959,0.432567,0.366307,0.328566,0.405791,0.603594,0.351336,0.491989,0.299602,0.389929,0.491896,0.508318,0.409950,0.506991,0.383289,0.500518,0.561998


In [28]:
for pos in range(len(df_pca)):
    df_pca.iloc[pos].to_csv(f"data/{i}_cortes_pca/{data.index[pos][:-4]}.csv",index=False, index_label=None, header=False)