# MSE

### Imports

In [1]:
import os 
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
import tensorflow as tf
import matplotlib as mpl
from IPython import display
import time
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform
import seaborn as sns
from matplotlib import colors
from sklearn.decomposition import PCA

from make_models import get_MRI_VAE_3D,get_MRI_CVAE_3D
from rsa_funcs import fit_rsa,make_RDM,get_triu
import ants

### Load and Sort Data

In [2]:
data_arr = np.load('../../BC-sim/BC-sim-bigdata/synth-data-01/sim-brain-array.npz') # load compressed brain array

In [3]:
print(list(data_arr.keys())) # print data keys
data = data_arr['data'] # name data key
controls = data_arr['controls'] # name TD/control key
patients = data_arr['patients'] # name ADHD/patient key
n = data.shape[0] # shape of data
print(data.shape) # number of subjects
print(data[patients,:,:,:].shape) # print shape of patient data [number of patients, brain voxels x, brain voxels y, brain voxels z]
data_patients = data[patients,:,:,:] # name ADHD brain data
data_controls = data[controls,:,:,:] # name TD brain data

['data', 'controls', 'patients']
(1000, 64, 64, 64)
(500, 64, 64, 64)


### CVAE

In [4]:
cvae, z_encoder, s_encoder, cvae_decoder = get_MRI_CVAE_3D(input_shape=(64,64,64,1),
                    latent_dim=2,
                    beta=1, # controls how far away latent features can go from normal distribution, stronger beta = more nromally distributed features
                    disentangle=False, # activates the decorrelation from gamma, next time True 
                    gamma=1, # total correlation loss that penalizes for z and s features being correlated, can be increased to 100
                    bias=True,
                    batch_size = 64,
                    kernel_size = 3,
                    filters = 32,
                    intermediate_dim = 128,
                    opt=None)

### Load Weights

In [5]:
cvae.load_weights('/mmfs1/data/bergerar/BC-sim/BC-sim-bigdata/synth-data-01/sim_weights_7500_epochs') # load weights

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x15544012b340>

### Get Reconstructions

In [6]:
def get_reconstruction(data_in,adhd = True): 
    
    # reconstructs ADHD brain based on shared and specific features, reconstructs TD based on just shared features

    z = z_encoder.predict(data_in)[0] # mu,std,sample
    s = s_encoder.predict(data_in)[0]

    if adhd==True:
        ll = np.hstack((z,s))
    else:
        ll = np.hstack((z,np.zeros(s.shape)))

    recon = cvae_decoder(ll)[:,:,:,:,0]
    return recon

In [None]:
%%time
recon_patients = get_reconstruction(data_patients,adhd = True) # reconstructs ADHD brains
recon_controls = get_reconstruction(data_controls,adhd = False) # reconstructs TD brains

In [None]:
recon_patients.shape # shape of reconstructed data

In [None]:
true_data = data_patients[0,:,:,:]
predict_data = recon_patients[0,:,:,:]
# Using 'auto'/'sum_over_batch_size' reduction type.
mse = tf.keras.losses.MeanSquaredError()
mse(true_data, predict_data).numpy()

### Mean Squared Error

In [None]:
mse_list = [] # start an empty list to add mses to
for s in range(0,500): # for loop that runs through all s from 0 to 500
    datas = np.array(data_patients[s,:,:,:]) # name original brain
    predict = np.array(recon_patients[s,:,:,:]) # name reconstructed brain
    mse = ((datas-predict)**2).mean() # 
    mse_list.append(mse)

mse_arr = np.array(mse_list)

In [None]:
plt.hist(mse_arr, alpha = 0.8, color='chartreuse', edgecolor = 'black', linewidth=0.4 )
plt.title('Mean Squared Error for Reconstructions', fontsize = 15)
plt.xlabel('MSE', fontsize = 15)
plt.ylabel('Number of Subjects', fontsize = 15)
plt.show()

### ***

In [None]:
mean = np.reshape(recon_patients, [-1,64,64,64])
recon_patients.shape

In [None]:
# then subtract mean from original
# reshape from 500,64,64,64 to 1,64,64,64 dont just squish all of it down to one dimension

mean = ### 
mean_list = [] # start an empty list to add mses to
for s in range(0,500): # for loop that runs through all s from 0 to 500
    datas2 = np.array(data_patients[s,:,:,:]) # name original brain
    predict2 = np.array(recon_patients[s,:,:,:]) # name reconstructed brain
    mean_from_original = ((datas-mean)**2).mean() # 
    mean_list.append(mean_from_original)