In [None]:
import numpy as np 
import scipy as sp
import pandas as pd
from datetime import datetime
from utils import *

import sys
import os
import json
from sklearn.preprocessing import PowerTransformer
import matplotlib.pyplot as plt

In [None]:
json_path = '/path/to/CPM_json'
mat_path = '/path/to/CPM_mat'

# Settings

In [None]:
t = 'bsl' # bsl or y2
k = 10    # number of folds
p_thresh = 0.05  # p threshold for significant edges
repeat = 10  # number of iterations for true data
num_iter = 0  # number of iterations for permutation tests
behav_pc = 'rsfmri_sal' 
num_roi = 86
num_contrasts = 8
zscore=False # whether to z-score training edge strengths
mode='linear' # ridge or linear
y_norm='id' # id, yj, or norm

beta_path = '/path/to/coactivation_matrices_{}_{}ROIs_rm_nBack'.format(t,num_roi)
behav_path = '/path/to/data'
out_path = '{}/{}fold_p_thresh_{}_repeat{}_iter{}_behav_{}_timepoint_{}_{}ROI_{}contrasts_rm_nBack_z{}_mode_{}_ynorm_{}.json'.format(json_path,k,p_thresh,repeat,num_iter,behav_pc,t,num_roi,num_contrasts, int(zscore), mode, y_norm)

In [None]:
out_path

# Mat generation

## read raw data

In [None]:
with open('{}/lst_usable_{}.txt'.format(beta_path,t),'r') as f:
    lst_subjkey_img = f.read().splitlines()

print("#subj to include based on task fMRI data inclusion criteria and NaN situation: {}".format(len(lst_subjkey_img)))

In [None]:
lst_subjkey_img[:3]

In [None]:
df_behav = pd.read_csv("{}/upps_bsl.csv".format(behav_path), index_col=0)
df_behav

In [None]:
df_behav = df_behav[['src_subject_id','eventname',behav_pc]]
df_behav = df_behav.dropna(how='any')
df_behav = df_behav.reset_index(drop=True)
df_behav['subjectkey'] = df_behav['src_subject_id']
df_behav

In [None]:
lst_subjkey_behav = df_behav['subjectkey'].values.tolist()
print("#subj have availability of behav data: {}".format(len(lst_subjkey_behav)))

In [None]:
lst_subjkey_overlap = [x for x in lst_subjkey_img if x in lst_subjkey_behav]
print("Final number of subjects: {}".format(len(lst_subjkey_overlap)))

In [None]:
lst_mat = generate_file_list(beta_path, lst_subjkey_overlap, num_roi, num_contrasts, t)
print(len(lst_mat))

In [None]:
lst_mat[:3]

In [None]:
X = read_mats(lst_mat)
print(X.shape)

In [None]:
y = []
#key='neurocog_{}.bl'.format(behav_pc)
key=behav_pc
for subj in lst_subjkey_overlap:
    y.append(df_behav[df_behav['subjectkey']==subj][key].values[0])

y=np.array(y)
print(y.shape)

In [None]:
y

In [None]:
plt.hist(y,bins=20)
plt.show()

## Transform y

In [None]:
yn, transformer = y_transform(y, y_norm)

In [None]:
yn

In [None]:
plt.hist(yn,bins=20)
plt.show()

In [None]:
import pickle
filename = '{}/ymodel_{}_{}roi_{}contrast_ynorm_{}.pkl'.format(mat_path, behav_pc, num_roi, num_contrasts, y_norm)
pickle.dump(transformer, open(filename, 'wb'))

In [None]:
transformer = pickle.load(open(filename, 'rb'))

In [None]:
transformer

In [None]:
np.min(yn)

In [None]:
np.max(yn)

## Save mat

In [None]:
save_matlab_mat(mat_path, '{}_{}roi_{}contrast_ynorm_{}.mat'.format(behav_pc, num_roi, num_contrasts, y_norm), X,yn,lst_subjkey_overlap)

In [None]:
x, y, lst_subjectkey = read_matlab_mat(mat_path, '{}_{}roi_{}contrast_ynorm_{}.mat'.format(behav_pc, num_roi, num_contrasts, y_norm))

In [None]:
print(x.shape)
print(y.shape)

In [None]:
y

# Json generation

In [None]:
jdict = {
  "t" : t,
  "k" : k,
  "p_thresh" : p_thresh,
  "repeat" : repeat,
  "num_iter" : num_iter,
  "mat_path" : mat_path,
  "mat_name" : '{}_{}roi_{}contrast_ynorm_{}.mat'.format(behav_pc, num_roi, num_contrasts, y_norm),
  "zscore" : zscore,
  "mode" : mode,
  "y_norm" :y_norm,
  "base_dir" : "/Users/fengdanye/Documents/Yale_Research/ABCD/CPM_outputs"
}

In [None]:
out_path

In [None]:
with open(out_path, "w") as outfile: 
    json.dump(jdict, outfile)

In [None]:
with open(out_path) as json_data:
    data = json.load(json_data)

In [None]:
data

In [None]:
data['mat_name'][:-4]

In [None]:
data['zscore']

---
---