## License
This file is part of the project megFingerprinting. All of megFingerprinting code is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. megFingerprinting is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with megFingerprinting. If not, see https://www.gnu.org/licenses/.

In [2]:
import difflib
from fuzzywuzzy import fuzz
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
from os import listdir
from os.path import isfile, join
import pandas as pd
import re
import seaborn as sns
import scipy as sp
import scipy.io as sio
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from scipy import stats
from scipy.stats import pearsonr
sns.set(font_scale=2)
sns.set_style("whitegrid")
sns.set_palette(sns.color_palette("husl", 8))



In [3]:
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), sp.stats.sem(a)
    h = np.percentile(a, (1-((1-confidence)/2))*100)
    l = np.percentile(a, ((1-confidence)/2)*100)
    return m, l, h



# I. Subject Identifiability 

In [80]:

def prune_subject_csv(filename, roi):
    '''
    This function takes in the subject's csv file from MATLAB, takes out the 
    doubled correlations (because of symmetry) and outputs a numpy array ready to be concatenated
    in the grand feature matrix
    Args:
        filename (string): Name of the csv matrix
    Returns: 
        sub_feat (np.array): Subject's features 
    '''

    sub_feat = np.zeros([1, (n_feats)+1]) # Number of unique values in corr matrix + subject label
    psd_matrix = pd.read_csv(filename, header=None)
    mat=np.asmatrix(psd_matrix)
    sub_feat[0, :-1]=mat[np.arange(68) != roi,0:451].flatten()
    sub_feat[0, -1] = int(filename[19:23])     
    return sub_feat

# Parameters
n_subs = 133 # Change here to get number of participants! 
n_feats = int(67*451)
n_measurements = 2
self_id_roi = np.zeros((68,133))
accuracy_roi=np.zeros((2,68))
# Get n subjects: both training and testing datasets
for i in range(68):
    onlyfiles = [f for f in listdir('NEWspectraFUL/') if isfile(join('NEWspectraFUL/', f))]
    sub_target = np.zeros([n_subs, (n_feats)+1])
    sub_database = np.zeros([n_subs, (n_feats)+1])
    iv = 0
    it = 0
    for iFile in sorted(onlyfiles)[0:(n_subs*2)]: 
        sub = 'NEWspectraFUL/' + iFile
        #print(sub)
        #print(sub[33])
        if sub[28] == 'v':
            sub_target[iv, :] = prune_subject_csv(sub, i)
            iv += 1
        else:
            sub_database[it, :] = prune_subject_csv(sub, i)
            it += 1

    # Correlations can be computed as the dot product between two z-scored vectors
    z_target = sp.stats.zscore(sub_target[:, :-1], axis = 1)
    z_database = sp.stats.zscore(sub_database[:,:-1], axis = 1)
    predictions = z_target.dot(z_database.transpose()) / (sub_database.shape[1] - 1) # target, database
    target_from_database = accuracy_score(range(n_subs), predictions.argmax(axis = 1))
    database_from_target = accuracy_score(range(n_subs), predictions.argmax(axis = 0))

    print('When predicting the target from the database, we get a ' + str(target_from_database*100)[0:5] + '% accuracy')
    print('When predicting the database from the target, we get a ' + str(database_from_target*100)[0:5] + '% accuracy')
    
    accuracy_roi[0,i]=target_from_database*100
    accuracy_roi[1,i]=database_from_target*100
    # For the figure, we also get self-identifiability and reconstructed self-identifiability
    self_id_roi[i,:]= np.diagonal(sp.stats.zscore(predictions, axis = 1))

print(self_id_roi)

When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When predicting the target from the database, we get a 77.44% accuracy
When predicting the database from the target, we get a 82.70% accuracy
When p

In [81]:
df = pd.DataFrame(self_id_roi)
df.to_csv("AIC_ROI_differentiability_fullband.csv")
