# Independent Component Analysis

In this notebook we will make use of the `FastICA` algorithm provided by `sklearn` in order to extract the relevent features in a range (10,100) of number of components. 


First, let's import...


In [1]:
%cd ..
%cd "Notebook utilities"

C:\Users\delir\Documents\Manifold_learning\Feature_Extraction
C:\Users\delir\Documents\Manifold_learning\Feature_Extraction\Notebook utilities


In [2]:
#Importing libraries
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import tqdm 
import os 
from os.path import dirname, join as pjoin
from Data_Preprocessing import *
from sklearn.metrics import mean_squared_error
import pickle
import copy

import sys
import inspect

from Standar import *
from Data_Preprocessing import *

In [None]:
import scipy.io as sio
import numpy as np
from sklearn.decomposition import FastICA
from skimage.metrics import structural_similarity as ssim

In [None]:
### Set the random seed for reproducible results
torch.manual_seed(1234)
np.random.seed(1234)


## 1.1  Loading Dataset
---
#### FC matrices and a behavioural score (language) of 131 stroke patient contained in a mat file. 
The data is loaded from the .mat and .xlsl file and converted to a 3d array contining per each patient the correspondent fc matrix. Since not all patients have values, some of them are empty, those patients are simply removed and not consider. Furthermore NA values are converted to 0 values.

##### Vectorizing matrices
Following the paper we will exploit the symmetry of the matrix and convert each of them as vector

In [None]:
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 

#get data
mat_path = parentdir+'/DATA/FC_Stroke/FCMatrixImage_131subj.mat'
lang_path = parentdir+'/DATA/FC_Stroke/language_score.xlsx'
Normalize = True

fc_3d, language_score, ID = get_arrays(mat_path, lang_path, Normalize)

#vectorizing matrices
vect_mat = to_vector(fc_3d)

#standarize
for i in range(len(vect_mat)):
    vect_mat[i] = (vect_mat[i] - vect_mat[i].mean())/ vect_mat[i].std()

## 1.2 Extract features

----

Feature are extracted using `FastICA`.  THe structural similarity index and the mean squared error are computed. The code is based on: https://github.com/fcalesella/ccn_project

In [None]:
inp = vect_mat
components = np.arange(10, 100, 5)

ica = {}
mix = {}
mse = np.zeros(components.shape[0])
se = np.zeros(components.shape[0])

struc = np.zeros(components.shape[0])
struc_se = np.zeros(components.shape[0])
histogram_rer = []

for i, comp in enumerate(components):
    transformer = FastICA(n_components=comp, max_iter = 1000, random_state = 1234)
    name = 'n{}'.format(comp)
    sica = transformer.fit_transform(inp)
    ica[name] = sica
    reco = transformer.inverse_transform(sica)
    rer = np.mean((reco - inp)**2, 1)
    
    histogram_rer.append(rer)
    
    ##SSIM index
    s_ =[ssim(inp[j], reco[j]) for j in range(len(inp))]
    struc[i] = np.mean(s_)
    struc_se[i] = np.std(s_, ddof=1)
    
    #reconstruction error
    mse[i] = np.mean(rer)
    se[i] = np.std(rer, ddof=1) / np.sqrt(len(rer))
    mix[name] = transformer.mixing_
    
    #plotting samples with highest features
    n = 4
    idx = np.argsort(rer)
    plt.figure(figsize=(8, 4))
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(to_matrix(vect_mat[idx[i]]), cmap = 'jet')
        plt.title("original")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(to_matrix(reco[idx[i]]), cmap = 'jet')
        plt.title("reconstructed")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    
    plt.savefig('ICA_Recostructed_Images'+str(comp))
    


## 1.3 Saving data
----

In [None]:
data = pd.DataFrame([histogram_rer[0], histogram_rer[9], histogram_rer[17]])

data.index = ['10', '50', '90']
sns.set(font_scale = 2)
plt.figure(figsize=(15,7))
sns.kdeplot(data=data.T, fill=True, common_norm=False, palette="Accent",
   alpha=.5, linewidth=2)
plt.savefig('Distribution_ICA')
data.to_csv('ica_distribution.csv')

In [26]:
np.savetxt('C:\\Users\\delir\\Documents\\Score_Prediction_FMRI\\Results\\Results_Extractors\\ICA_RESULTS\\MSE_ICA.txt', mse)
np.savetxt('C:\\Users\\delir\\Documents\\Score_Prediction_FMRI\\Results\\Results_Extractors\\ICA_RESULTS\\SD_ICA.txt', se)


np.savetxt('C:\\Users\\delir\\Documents\\Score_Prediction_FMRI\\Results\\Results_Extractors\\ICA_RESULTS\\STRUC_MSE_ICA.txt', struc)
np.savetxt('C:\\Users\\delir\\Documents\\Score_Prediction_FMRI\\Results\\Results_Extractors\\ICA_RESULTS\\STRUC_SD_ICA.txt', struc_se)

In [27]:
import pickle
a_file = open("FEATURES_ICA.pkl", "wb")
pickle.dump(ica, a_file)
a_file.close()

a_file = open("WEIGHTS_ICA.pkl", "wb")
pickle.dump(mix, a_file)
a_file.close()