In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import shutil

import matplotlib.pyplot as plt
from PIL import Image
from skimage import io
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf

## Read dataset

In [3]:
# Main Directory
main_dir = 'Dataset/5001'
print(f'The total number of images is {len(os.listdir(main_dir))}')

The total number of images is 165


In [4]:
# specify the img directory path
path = main_dir

# list files in img directory
files = os.listdir(path)

image_list = []

for file in files:
    # make sure file is an image
    if file.endswith(('.jpg', '.png', 'jpeg', 'tiff')):
        #img_path = path + file
        image_list.append(os.path.join(path, file))
        
print(f'Image list top 5 examples:')
image_list.sort(reverse=False)
image_list[:5]

Image list top 5 examples:


['Dataset/5001/image_2015-11-01.tiff',
 'Dataset/5001/image_2015-11-08.tiff',
 'Dataset/5001/image_2015-11-15.tiff',
 'Dataset/5001/image_2015-11-22.tiff',
 'Dataset/5001/image_2015-11-29.tiff']

In [5]:
target_size = (740, 740, 12)

# VAE

In [6]:
import tensorflow.keras as keras
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape, MaxPooling2D, UpSampling2D
#from keras.layers import BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import mnist
from tensorflow.keras import backend as K
import numpy as np
import matplotlib.pyplot as plt

In [7]:
from tensorflow.python.framework.ops import disable_eager_execution

disable_eager_execution()

## Encoder Part

In [8]:
input_data = Input(shape=target_size, name='encoder_input')

encoder = Conv2D(32, (5,5), activation='relu')(input_data)
encoder = MaxPooling2D((2,2))(encoder)

encoder = Conv2D(64, (3,3), activation='relu')(encoder)
encoder = MaxPooling2D((2,2))(encoder)

encoder = Conv2D(64, (3,3), activation='relu')(encoder)
encoder = MaxPooling2D((2,2))(encoder)

conv_shape = K.int_shape(encoder) #Shape of conv to be provided to decoder

encoder = Flatten()(encoder)
encoder = Dense(200, activation='relu')(encoder)

## Latent Distribution and Sampling

In [9]:
def sample_latent_features(distribution):
    distribution_mean, distribution_variance = distribution
    batch_size = tf.shape(distribution_variance)[0]
    random = K.random_normal(shape=(batch_size, tf.shape(distribution_variance)[1]))
    return distribution_mean + tf.exp(0.5 * distribution_variance) * random

In [10]:
latent_dim = 200 # Number of latent dim parameters

distribution_mean = Dense(latent_dim, name='mean')(encoder)
distribution_variance = Dense(latent_dim, name='log_variance')(encoder)
latent_encoding = Lambda(sample_latent_features)([distribution_mean, distribution_variance])

In [11]:
encoder_model = Model(input_data, latent_encoding)
encoder_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 740, 740, 1  0           []                               
                                2)]                                                               
                                                                                                  
 conv2d (Conv2D)                (None, 736, 736, 32  9632        ['encoder_input[0][0]']          
                                )                                                                 
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 368, 368, 32  0           ['conv2d[0][0]']                 
                                )                                                             

## Decoder Part

In [12]:
decoder_input = Input(shape=(latent_dim), name='decoder_input')
decoder = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3])(decoder_input)

decoder = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(decoder)

"""
encoder = Conv2D(64, (5,5), activation='relu')(input_data)
encoder = MaxPooling2D((2,2))(encoder)

encoder = Conv2D(64, (3,3), activation='relu')(encoder)
encoder = MaxPooling2D((2,2))(encoder)

encoder = Conv2D(32, (3,3), activation='relu')(encoder)
encoder = MaxPooling2D((2,2))(encoder)
"""

decoder = UpSampling2D((2,2))(decoder)
decoder = Conv2DTranspose(64, (3,3), activation='relu')(decoder)

decoder = UpSampling2D((2,2))(decoder)
decoder = Conv2DTranspose(64, (3,3), activation='relu')(decoder)

decoder = Conv2DTranspose(32, (3,3), activation='relu')(decoder)
decoder = UpSampling2D((2,2))(decoder)

decoder_output = Conv2DTranspose(target_size[2], (5,5), activation='relu')(decoder)

In [13]:
decoder_model = Model(decoder_input, decoder_output)
decoder_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 decoder_input (InputLayer)  [(None, 200)]             0         
                                                                 
 dense_1 (Dense)             (None, 518400)            104198400 
                                                                 
 reshape (Reshape)           (None, 90, 90, 64)        0         
                                                                 
 up_sampling2d (UpSampling2D  (None, 180, 180, 64)     0         
 )                                                               
                                                                 
 conv2d_transpose (Conv2DTra  (None, 182, 182, 64)     36928     
 nspose)                                                         
                                                                 
 up_sampling2d_1 (UpSampling  (None, 364, 364, 64)     0   

## Combining

In [14]:
encoded = encoder_model(input_data)
decoded = decoder_model(encoded)

In [15]:
autoencoder = Model(input_data, decoded)

## Loss Function (Reconstruction Loss + KL-loss)
https://towardsdatascience.com/variational-autoencoders-as-generative-models-with-keras-e0c79415a7eb

In [16]:
def get_loss(distribution_mean, distribution_variance):
    
    def get_reconstruction_loss(y_true, y_pred):
        reconstruction_loss = keras.losses.mse(y_true, y_pred)
        reconstruction_loss_batch = tf.reduce_mean(reconstruction_loss)
        return reconstruction_loss_batch#*target_size[0]*target_size[1]
    
    def get_kl_loss(distribution_mean, distribution_variance):
        kl_loss = 1 + distribution_variance - tf.square(distribution_mean) - tf.exp(distribution_variance)
        kl_loss_batch = tf.reduce_mean(kl_loss)
        #return kl_loss_batch*(-5e-4)
        return kl_loss_batch*(-.5)
    
    def total_loss(y_true, y_pred):
        reconstruction_loss_batch = get_reconstruction_loss(y_true, y_pred)
        kl_loss_batch = get_kl_loss(distribution_mean, distribution_variance)
        #return reconstruction_loss_batch + kl_loss_batch
        return K.mean(reconstruction_loss_batch + kl_loss_batch)
    
    return total_loss

## Compile the model

In [17]:
# Compile VAE
autoencoder.compile(loss=get_loss(distribution_mean, distribution_variance), optimizer='adam')
autoencoder.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_input (InputLayer)  [(None, 740, 740, 12)]    0         
                                                                 
 model (Functional)          (None, 200)               103825656 
                                                                 
 model_1 (Functional)        (None, 740, 740, 12)      104300332 
                                                                 
Total params: 208,125,988
Trainable params: 208,125,988
Non-trainable params: 0
_________________________________________________________________


# Model of 200 features

In [18]:
autoencoder.load_weights('Models/vae_v2.h5', by_name=True)

Metal device set to: Apple M1


2022-06-05 10:53:22.503499: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-06-05 10:53:22.503718: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-06-05 10:53:22.551432: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-06-05 10:53:22.555394: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-05 10:53:22.662819: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-05 10:53:23.349822: I tensorflow/core/grappler/o

# Generate embeddings

In [19]:
model = keras.Sequential()
for layer in autoencoder.layers[:-1]: # just exclude last layer from copying
    model.add(layer)
    
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model (Functional)          (None, 200)               103825656 
                                                                 
Total params: 103,825,656
Trainable params: 103,825,656
Non-trainable params: 0
_________________________________________________________________


In [20]:
def read_image(path):
    image_test = io.imread(path)
    image_test = resize(image_test, (target_size[0], target_size[1]),
                           anti_aliasing=True)
    image_test = np.expand_dims(image_test,axis=0)
    return image_test

In [21]:
def generate_embedding(image):
    embaedding = model.predict(image)
    embaedding = np.squeeze(embaedding, axis=0)
    return embaedding

In [22]:
def get_image_name(path):
    image_name = path[path.index('/image')+7:path.index('.tiff')]
    return image_name

In [23]:
embeddings = pd.DataFrame(columns=['Date', 'Embedding'])

for path in image_list:
    image = read_image(path=path)
    embedding = generate_embedding(image)
    date = get_image_name(path=path)
    embeddings = embeddings.append({'Date': date, 'Embedding': embedding}, ignore_index=True )

embeddings

2022-06-05 10:53:24.924620: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,Date,Embedding
0,2015-11-01,"[-1.1382818, -1.3830336, -0.0005828738, 1.0719..."
1,2015-11-08,"[-0.78371197, -0.87907755, 0.89000547, -1.4609..."
2,2015-11-15,"[-0.71112436, 0.7805053, 0.6321262, -0.2048684..."
3,2015-11-22,"[0.38520145, 0.014107712, 0.82281876, 0.324864..."
4,2015-11-29,"[-0.32229534, 0.38524646, -0.2019231, 0.216838..."
...,...,...
160,2018-11-25,"[-0.052257597, -0.66791594, -0.9568243, -0.367..."
161,2018-12-02,"[-0.14568828, -0.8766035, -2.023767, -1.774762..."
162,2018-12-09,"[-0.7028471, 0.57195914, 0.8823118, 0.98717546..."
163,2018-12-16,"[0.6703096, -1.3891406, -0.9479689, -0.9343066..."


In [24]:
def split_columns(df):
    df_aux = pd.DataFrame(df['Embedding'].tolist())
    df_aux = pd.concat( [df['Date'], df_aux], axis=1)
    return df_aux

In [25]:
# new df from the column of lists
embeddings_df = split_columns(embeddings)

embeddings_df.to_csv('Embeddings/embeddings_medellin_200features.csv',index=False)
# display the resulting df
embeddings_df

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,190,191,192,193,194,195,196,197,198,199
0,2015-11-01,-1.138282,-1.383034,-0.000583,1.071936,-0.359077,0.440569,-1.345535,-0.597920,-0.416173,...,-0.474824,-0.102958,0.874375,-0.633094,0.394676,-0.026532,1.617163,-0.549096,-1.465783,-0.298742
1,2015-11-08,-0.783712,-0.879078,0.890005,-1.460927,-0.009188,-1.029440,0.532122,-0.533219,0.888485,...,-0.330899,1.282552,0.930263,0.607250,0.775984,-0.530287,1.045188,-1.449551,-0.319409,-0.364504
2,2015-11-15,-0.711124,0.780505,0.632126,-0.204868,1.291417,-0.262743,0.276512,0.554111,0.619750,...,0.248257,-1.038282,0.387911,-0.306651,0.462835,0.191473,0.528304,0.136528,-0.337819,-1.640651
3,2015-11-22,0.385201,0.014108,0.822819,0.324864,-0.754227,-0.948322,0.411982,-0.068616,0.908859,...,0.783285,2.022241,-0.638509,-0.335895,0.242142,0.417413,-0.654682,1.334102,-0.062384,0.367632
4,2015-11-29,-0.322295,0.385246,-0.201923,0.216839,-1.210527,-0.358221,0.170528,1.724133,0.484462,...,0.293845,1.211233,-1.103105,0.358128,0.257894,-0.548598,0.288822,-0.375550,-1.440099,-0.490967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,2018-11-25,-0.052258,-0.667916,-0.956824,-0.367436,0.448512,-0.789400,-0.207542,0.410352,0.352251,...,0.325295,-0.688529,-0.377220,-0.383202,0.036222,-0.198222,0.658092,-0.553342,-1.260451,0.073021
161,2018-12-02,-0.145688,-0.876603,-2.023767,-1.774763,0.994211,0.051375,-0.798159,0.571067,0.348390,...,0.097879,-0.876737,0.136812,-0.422020,-1.275541,-1.151508,-0.762427,0.169222,-1.052446,-0.145526
162,2018-12-09,-0.702847,0.571959,0.882312,0.987175,2.187126,0.237733,-1.960285,1.046651,-0.013636,...,0.488231,0.479391,1.622876,-1.240709,-1.143944,0.111055,1.009192,0.228006,-0.068509,0.137648
163,2018-12-16,0.670310,-1.389141,-0.947969,-0.934307,0.414450,2.052175,0.951190,0.656277,-0.016212,...,-0.130292,-1.692443,-0.561952,0.609132,-0.484486,0.623357,1.231843,0.448572,-0.006351,0.818285


In [26]:
pd.read_csv('Embeddings/embeddings_medellin_200features.csv')

Unnamed: 0,Date,0,1,2,3,4,5,6,7,8,...,190,191,192,193,194,195,196,197,198,199
0,2015-11-01,-1.138282,-1.383034,-0.000583,1.071936,-0.359077,0.440569,-1.345535,-0.597920,-0.416173,...,-0.474824,-0.102958,0.874375,-0.633094,0.394676,-0.026532,1.617163,-0.549096,-1.465783,-0.298742
1,2015-11-08,-0.783712,-0.879078,0.890005,-1.460927,-0.009188,-1.029440,0.532122,-0.533219,0.888485,...,-0.330899,1.282552,0.930263,0.607250,0.775984,-0.530287,1.045188,-1.449551,-0.319409,-0.364504
2,2015-11-15,-0.711124,0.780505,0.632126,-0.204868,1.291418,-0.262743,0.276512,0.554111,0.619750,...,0.248257,-1.038282,0.387911,-0.306651,0.462835,0.191473,0.528304,0.136528,-0.337819,-1.640651
3,2015-11-22,0.385201,0.014108,0.822819,0.324864,-0.754227,-0.948322,0.411982,-0.068616,0.908859,...,0.783285,2.022241,-0.638509,-0.335895,0.242142,0.417413,-0.654682,1.334102,-0.062384,0.367632
4,2015-11-29,-0.322295,0.385246,-0.201923,0.216839,-1.210527,-0.358221,0.170528,1.724133,0.484462,...,0.293845,1.211233,-1.103105,0.358128,0.257894,-0.548598,0.288822,-0.375550,-1.440099,-0.490967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,2018-11-25,-0.052258,-0.667916,-0.956824,-0.367436,0.448512,-0.789400,-0.207542,0.410352,0.352251,...,0.325295,-0.688529,-0.377220,-0.383202,0.036222,-0.198221,0.658092,-0.553342,-1.260451,0.073021
161,2018-12-02,-0.145688,-0.876603,-2.023767,-1.774763,0.994211,0.051375,-0.798159,0.571067,0.348390,...,0.097879,-0.876737,0.136812,-0.422020,-1.275541,-1.151508,-0.762427,0.169222,-1.052446,-0.145526
162,2018-12-09,-0.702847,0.571959,0.882312,0.987175,2.187126,0.237733,-1.960285,1.046651,-0.013636,...,0.488231,0.479391,1.622876,-1.240709,-1.143944,0.111055,1.009192,0.228006,-0.068509,0.137648
163,2018-12-16,0.670310,-1.389141,-0.947969,-0.934307,0.414450,2.052175,0.951190,0.656277,-0.016212,...,-0.130292,-1.692443,-0.561952,0.609132,-0.484486,0.623357,1.231843,0.448572,-0.006351,0.818284
