In [2]:
import os
import pandas as pd
import datetime

import pathlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import importlib


from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
from tensorflow import keras
from tensorflow.keras import callbacks  

# project specific
from utils import data_handler
from utils.models import cnn_encoder

%load_ext tensorboard
!rm -rf ../workfiles/logs/

In [45]:
importlib.reload(data_handler) # to allow modification of the script without restarting the whole session

sgdc_params = {
            #'penalty':["elasticnet", "l1", "l2"],
            'penalty':["l1"],
            #'l1_ratio':np.linspace(0.1, 1, 5),
            'alpha':np.linspace(0.1, 0.5, 5),
        }

x_train, filenames, n_genes = data_handler.generate_timeseries_dataset(feature_selection_threshold = 2, 
                                                   #feature_selection_proceedure = "LASSO", 
                                                   retain_phases="Both", 
                                                   #retain_phases=None, 
                                                   return_id = True,
                                                   sgdc_params = sgdc_params,
                                                   #subsample = 100,
                                                   #class_balancing = "match_smaller_sample")
                                                   class_balancing = "balanced")


loading samples...
loaded 1585 samples
selecting genes based on median absolute deviation threshold:  2 ...
number of genes selected :  14864
normalizing data...
normalization done
number of seq to be analized : 1585
number of actual individual to be studied : 317
5


In [90]:
importlib.reload(cnn_encoder) # to allow modification of the script without restarting the whole session

latent_dim = 64
sequence_length = 5
t_shape = (sequence_length, n_genes)


autoencoder = cnn_encoder.generate_model(t_shape, latent_dim)
autoencoder.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())

In [91]:
checkpoint_filepath = '../workfiles/simple_ae/checkpoint'
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='loss',
    mode='min',
    save_best_only=True)


reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,
                              patience=25, min_lr=0.00001)

early_stopping_callback = callbacks.EarlyStopping(monitor='loss', patience=50)


log_dir = "../workfiles/logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

cb = [model_checkpoint_callback, 
      reduce_lr, 
      early_stopping_callback, 
      tensorboard_callback]

In [93]:
#autoencoder.build(input_shape = (None, sequence_length, n_genes))
#autoencoder.encoder.summary()
#autoencoder.decoder.summary()

In [95]:
hist = autoencoder.fit(x_train, epochs=2000, callbacks=cb)

Epoch 1/2000
Epoch 2/2000

In [None]:
autoencoder.load_weights(checkpoint_filepath)


In [None]:
plt.plot(hist.history['loss'])
print(hist.history['loss'])




In [6]:
e = iter(x_train).next()
z = autoencoder.encoder(e)
z

<tf.Tensor: shape=(64, 6161), dtype=float32, numpy=
array([[  28.0786,   48.2889,   31.8768, ...,   21.601 ,   14.5302,
          43.2891],
       [  34.8483,   55.0274,   36.8456, ...,   28.7308,   33.4212,
          42.899 ],
       [  31.6444,   57.4141,   41.5785, ...,   37.2859,   22.7498,
          95.4406],
       ...,
       [  29.2446,   57.8746,   54.0729, ...,   83.0774,   39.6068,
         172.727 ],
       [  42.2485,   58.0206,   36.2277, ...,   65.3682,   19.7801,
         115.36  ],
       [1085.74  ,   54.7478,   37.7932, ...,   68.5695,   75.1809,
         106.025 ]], dtype=float32)>

In [7]:
print(autoencoder.decoder(z))
print(e)

<tf.Tensor: shape=(64, 64), dtype=float32, numpy=
array([[0.0000000e+00, 1.1011568e+00, 8.5003336e-24, ..., 5.6258583e-33,
        6.5438644e+01, 0.0000000e+00],
       [0.0000000e+00, 8.4733200e+01, 1.3865252e-31, ..., 0.0000000e+00,
        5.2268614e-14, 0.0000000e+00],
       [0.0000000e+00, 1.2151521e+01, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 2.0064553e-29],
       ...,
       [5.5866787e-33, 5.3734863e+01, 4.0491797e-14, ..., 1.0285117e-18,
        5.3045254e+01, 3.3808381e-09],
       [1.8548418e-27, 6.9199547e+01, 6.2420300e-17, ..., 0.0000000e+00,
        9.0398363e-15, 0.0000000e+00],
       [0.0000000e+00, 1.0115930e+02, 3.0693047e-25, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00]], dtype=float32)>

In [8]:
autoencoder.encoder.save('../workfiles/cnn_autoencoer_model')






INFO:tensorflow:Assets written to: ../workfiles/cnn_autoencoer_model/assets


INFO:tensorflow:Assets written to: ../workfiles/cnn_autoencoer_model/assets


In [9]:
compressed_dataframe = autoencoder.encoder.predict(x_train)



In [10]:
df = pd.DataFrame(compressed_dataframe)
df["name"] = filenames

In [11]:
df.to_csv("../workfiles/compressed_data_cnn_autoencoder_phase_2.csv", index = False)
