# Deep Autoencoder applied to the stroke dataset
---


In this notebook, an *autoencoder* is implemented using `Keras` consisted of only one Dense Layer with a Nonlinear activation function such as the `ReLU`.

In [None]:
%cd ..
%cd "Notebook utilities"

In [1]:
#Importing libraries
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd 
import random 
import tqdm 
import gc
import copy
from tqdm import tqdm
from skimage.metrics import structural_similarity as ssim
import time

import os 
from os.path import dirname, join as pjoin
from Data_Preprocessing import *

from sklearn.preprocessing import StandardScaler
from keras.layers import Input, Dense
from keras.models import Model
from tensorflow import keras
from sklearn.preprocessing import StandardScaler

import os
import sys
import inspect

In [None]:
### Set the random seed for reproducible results
torch.manual_seed(1234)
np.random.seed(1234)

In [5]:
#Create dataset
#get data
mat_path = '/content/drive/MyDrive/INTERN THESIS/FC_Stroke/FCMatrixImage_131subj.mat'
lang_path = '/content/drive/MyDrive/INTERN THESIS/FC_Stroke/language_score.xlsx'
Normalize = True

fc_3d, language_score, ID = get_arrays(mat_path, lang_path, Normalize)

#vectorizing matrices
vect_mat = vectorize_data(fc_3d)


In [6]:
from sklearn.preprocessing import StandardScaler
from keras.layers import Input, Dense
from keras.models import Model

from sklearn.preprocessing import StandardScaler
from keras.layers import Input, Dense
from keras.models import Model
from tensorflow import keras


from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

FC = sio.loadmat(mat_path)
n_subj = len(FC['img'][0])
len_data = len(FC['img'][0][0][1])

fc_3d = np.zeros((n_subj,len_data, len_data) )

from tqdm import tqdm
for subject in tqdm(range(n_subj)):
    
    if len(FC['img'][0][subject][1]) != 0:
        if np.all(FC['img'][0][subject][1] != 0):
            df = pd.DataFrame(FC['img'][0][subject][1])
            #converting NA to 0 values - The one in the diagonal are NA
            df = df.fillna(0)
            
            sc.fit(df)
            df = sc.transform(df)

            #df = (df - df.mean())/df.std()
            fc_3d[subject] = np.asanyarray(df)

100%|██████████| 131/131 [00:00<00:00, 186.51it/s]


In [7]:
idx = 0
indexes = []
for i in fc_3d:
    if np.all(i == 0):
        indexes.append(idx)
    idx+=1
fc_3d = np.delete(fc_3d, indexes, axis = 0)

In [8]:
#vectorizing matrices
vect_mat = vectorize_data(fc_3d)

In [11]:
### Set the random seed for reproducible results
torch.manual_seed(1234)
np.random.seed(1234)

components = np.arange(10, 100, 5)

In [None]:
import time
from tensorflow import keras

inp_shape = vect_mat.shape[1]
input_img = Input(shape = (inp_shape,))
mse = np.zeros(components.shape[0])
se = np.zeros(components.shape[0])
embedded_space = {}
weigth = {}
timing = []
for i, encoding_dim in enumerate(tqdm(components)):
  name = 'n{}'.format(encoding_dim)

  start_time = time.time()
  encoded = Dense(encoding_dim, activation='LeakyReLU')(input_img)
  decoded = Dense(inp_shape, activation='LeakyReLU')(encoded)
  # this model maps an input to its reconstruction
  autoencoder = Model(input_img, decoded)
  #Encoder
  encoder = Model(input_img, encoded)
  # create a placeholder for an encoded (32-dimensional) input
  encoded_input = Input(shape=(encoding_dim,))
  # retrieve the last layer of the autoencoder model
  decoder_layer = autoencoder.layers[-1]
  # create the decoder model
  decoder = Model(encoded_input, decoder_layer(encoded_input))

  opt = keras.optimizers.Adam(learning_rate=0.001)

  autoencoder.compile(optimizer='adam', loss='mse')

  history = autoencoder.fit(vect_mat,vect_mat,
                  epochs=50,
                  batch_size=16,
                  validation_split=0.1,
                  shuffle=True)



  res = autoencoder.predict(vect_mat)
  encoded_imgs = encoder.predict(vect_mat)
  decoded_imgs = decoder.predict(encoded_imgs)
  embedded_space[name] = encoded_imgs
  rer = np.mean((vect_mat - decoded_imgs)**2, 1)
  mse[i] =np.mean(rer)/np.sqrt(324)
  se[i] = np.std(rer, ddof=1) / np.sqrt(len(rer))
  weigth[name] = autoencoder.weights
  end_time = time.time()
  timing.append(end_time - start_time)
  print(mse)

  #plot  losses 
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_loss'])
  plt.title('model train vs validation loss')
  plt.ylabel('loss')
  plt.xlabel('epoch')
  plt.legend(['train', 'validation'], loc='upper right')
  plt.savefig('Non_Losses'+str(encoding_dim))
  plt.show()
  plt.close()

  n=10
  plt.figure(figsize=(20, 4))
  for i in range(n):
      # display original
      ax = plt.subplot(2, n, i + 1)
      plt.imshow(from_vec_to_mat(vect_mat[i], 324), cmap = 'jet')
      plt.title("original")
      ax.get_xaxis().set_visible(False)
      ax.get_yaxis().set_visible(False)

      # display reconstruction
      ax = plt.subplot(2, n, i + 1 + n)
      plt.imshow(from_vec_to_mat(decoded_imgs[i], 324), cmap = 'jet')
      plt.title("reconstructed")
      ax.get_xaxis().set_visible(False)
      ax.get_yaxis().set_visible(False)
  plt.savefig('Non_Linear_Recostructed_Images_'+str(encoding_dim))
  plt.show()
  plt.close()

In [13]:
np.savetxt('MSE_NONLINEARAE.txt', mse)
np.savetxt('SD_NONLINEARAE.txt', se)

In [14]:
import pickle
a_file = open("FEATURES_NONLINEARAE.pkl", "wb")
pickle.dump(embedded_space, a_file)
a_file.close()

a_file = open("WEIGHTS_NONLINEARAE.pkl", "wb")
pickle.dump(weigth, a_file)
a_file.close()

In [None]:
data = pd.DataFrame(histogram_rer)
data.index = ['10', '50', '90']
import seaborn as sns
sns.set(font_scale = 2)
plt.figure(figsize=(15,7))
sns.kdeplot(data=data.T, fill=True, common_norm=False, palette="Accent",
   alpha=.5, linewidth=2)
plt.savefig('Distribution_nonLinear')
data.to_csv('nonlinear_distribution.csv')