[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/drarkadeep/dangerously-devilish-notebooks/blob/main/nma/fMRI/GLM-for-all.ipynb)

In [None]:
!pip install --quiet nilearn

In [None]:
import numpy as np
import pandas as pd
from nilearn.glm.first_level import make_first_level_design_matrix
from scipy import stats
import statsmodels.api as sm
from statsmodels.sandbox.stats.multicomp import multipletests
import os, requests
import tarfile
%matplotlib inline

fname = "hcp_task.tgz"
url = "https://osf.io/2y3fw/download"
if not os.path.isfile(fname):
  try:
    r = requests.get(url)
  except requests.ConnectionError:
    print("!!! Failed to download data !!!")
  else:
    if r.status_code != requests.codes.ok:
      print("!!! Failed to download data !!!")
    else:
      with open(fname, "wb") as fid:
        fid.write(r.content)
HCP_DIR = "./hcp_task"
if not os.path.exists(HCP_DIR):
    with tarfile.open(fname) as tfile:
      tfile.extractall('.')
fname = f"{HCP_DIR}/atlas.npz"
url = "https://osf.io/j5kuc/download"
if not os.path.isfile(fname):
  r = requests.get(url)
  with open(fname, "wb") as fid:
    fid.write(r.content)
with np.load(fname) as dobj:
  atlas = dict(**dobj)
fsaverage = datasets.fetch_surf_fsaverage()

In [None]:
runs = ['LR','RL']
conditions = {
    'MOTOR'      : {'cond':['lf','rf','lh','rh','t','cue']},
    'WM'         : {'cond':['0bk_body','0bk_faces','0bk_places','0bk_tools','2bk_body','2bk_faces','2bk_places','2bk_tools']},
    'EMOTION'    : {'cond':['fear','neut']},
    'GAMBLING'   : {'cond':['loss','win']},
    'LANGUAGE'   : {'cond':['math','story']},
    'RELATIONAL' : {'cond':['match','relation']},
    'SOCIAL'     : {'cond':['mental','rnd']}
}
regions = np.load(f"{HCP_DIR}/regions.npy").T
region_df = pd.DataFrame({'region': regions[0],
                        'network' : regions[1],
                        'hemi' : ['Right']*int(180) + ['Left']*int(180)
                        })
subjects = np.loadtxt(os.path.join(HCP_DIR, 'subjects_list.txt'), dtype='str')


In [None]:
def load_single_timeseries(subject, experiment, run):
  bold_run  = runs[run]
  bold_path = f"{HCP_DIR}/subjects/{subject}/{experiment}/tfMRI_{experiment}_{bold_run}"
  bold_file = "data.npy"
  ts = np.load(f"{bold_path}/{bold_file}")
  ts -= ts.mean(axis=1, keepdims=True)
  ts /= ts.std(axis=1, keepdims=True)
  return ts.T


def get_events(subject, experiment, run):
    task_key = f'tfMRI_{experiment}_{runs[run]}'
    events_data = []
    for cond in conditions[experiment]['cond']:
        ev_file = f"{HCP_DIR}/subjects/{subject}/{experiment}/{task_key}/EVs/{cond}.txt"
        ev_array = np.loadtxt(ev_file, ndmin=2, unpack=True)
        df = pd.DataFrame({
            'onset': ev_array[0],
            'duration': ev_array[1],
            'trial_type': cond
        })
        events_data.append(df)
    events = pd.concat(events_data, ignore_index=True)
    events = events.sort_values('onset')    
    return events
    

def create_design_matrix(subject, experiment, run, TR, n_scans):
    events = get_events(subject, experiment, run)
    frame_times = np.arange(n_scans) * TR    
    design_matrix = make_first_level_design_matrix(
        frame_times, 
        events,
        hrf_model='spm + derivative',
        drift_model='cosine',
        high_pass=0.01 
    )
    print(f"Did design_matrix for {experiment}")    
    return design_matrix

def run_first_level_glm(subject, experiment, run, TR, correction):
    bold_data = load_single_timeseries(subject, experiment, run)
    n_rois = bold_data.shape[1]
    n_scans = bold_data.shape[0]
    design_matrix = create_design_matrix(subject, experiment, run, TR, n_scans)
    
    results = []
    
    for roi in range(n_rois):
        model = sm.OLS(bold_data[:, roi], design_matrix).fit()
        corrected_p_values = multipletests(np.array(model.pvalues).flatten(), alpha=0.05, method=correction)[1]        
        results.append({
            'ROI': roi,
            'coefficients': np.array(model.params),
            'p_values': np.array(model.pvalues),
            'corrected_p_values': corrected_p_values,
            't_values': np.array(model.tvalues),
            'rsquared': model.rsquared
        })
    results_df = pd.DataFrame(results)    
    results_df["region"] = results_df["ROI"].map(region_df["region"])
    results_df["network"] = results_df["ROI"].map(region_df["network"])
    results_df["hemi"] = results_df["ROI"].map(region_df["hemi"])  
    path=f"1st_level_glm_result/{experiment}"
    os.makedirs(path, exist_ok=True)
    results_df.to_csv(f"{path}/{subject}_{run}.csv", index=False)
    print(f"Saved 1st level for {experiment} {run} {subject}") 
    return results_df, design_matrix.columns

def prepare_second_level_data(all_subjects_results, regressor_index):
    group_data = []
    for subject_results in all_subjects_results:
        group_data.append(subject_results['coefficients'].apply(lambda x: x[regressor_index]))
    return np.array(group_data)

def run_second_level_glm(group_data, second_level_correction):
    n_rois = group_data.shape[1]
    results = []    
    for roi in range(n_rois):
        t_stat, p_value = stats.ttest_1samp(group_data[:, roi], 0)     
        results.append({
            'ROI': roi,
            't_statistic': t_stat,
            'p_value': p_value
        })    
    results_df = pd.DataFrame(results)    
    corrected_p_values = multipletests(results_df['p_value'], alpha=0.05, method=second_level_correction)[1]
    results_df['corrected_p_value'] = corrected_p_values
    results_df["region"] = results_df["ROI"].map(region_df["region"])
    results_df["network"] = results_df["ROI"].map(region_df["network"])
    results_df["hemi"] = results_df["ROI"].map(region_df["hemi"])    
    return results_df

def glm_for_cohort(subject_cohort, experiment):
    first_level_results = []
    second_level_results = {}
    
    for subject in subject_cohort:
        for run in range(2):
            subject_results, columns = run_first_level_glm(subject, experiment, run, TR, first_level_correction) 
            first_level_results.append(subject_results)
            
    n_regressors = len(first_level_results[0]['coefficients'][0])
    for regressor_index in range(n_regressors):
        group_data = prepare_second_level_data(first_level_results, regressor_index)
        second_level_results[columns[regressor_index]] = run_second_level_glm(group_data, second_level_correction)
    
    return second_level_results

TR = 0.72
first_level_correction = "fdr_bh"
second_level_correction = "fdr_bh"

for condition in conditions.keys():
    print(f"Starting {condition}")
    final_results = glm_for_cohort(subjects, condition)
    for key, df in final_results.items():
        path = f"2nd_level_glm_result/{condition}"
        os.makedirs(path, exist_ok=True)
        df.to_csv(f'{path}/{key}.csv', index=False)
