# FAAH Genotype Analysis

In [None]:
# Set variables and paths

import pandas as pd
import numpy as np
from scipy.stats import zscore
import matplotlib.pyplot as plt
import seaborn as sns
# from statannot import add_stat_annotation
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.graphics.factorplots import interaction_plot
from scipy.stats import ttest_ind
from datetime import date
today = str(date.today())
import bct
from glob import glob

#Read in Rest data
home = '/home/lms233/ABCD_FAAH'
baseline = pd.read_csv(home + '/Baseline_Masterfile_n=3109_2021-02-24.csv')
#Set model variables
yvar = 'cbcl_scr_dsm5_anxdisord_r'
#Set paths
all_mats_dir = '<path to data>'
out = '<path to models>/fitted_models'

### Read in Results

In [None]:
# # Compute average NBS Adjacency matrix
NBS_list = glob(out + '/NBS_AdjMat*')
print('{} runs'.format(len(NBS_list)))
out_matrix = np.ones((368, 368, len(NBS_list)))
for i in range(0, len(NBS_list)):
    file = NBS_list[i]
    mat = pd.read_csv(file, index_col=0).to_numpy()
    #Select highest value
    net_val = mat.max()
    mask = np.argwhere(mat == net_val)
    #Create new adj mat
    replaced_mat = np.zeros([368, 368])
    for j in range(0, len(mask)):
        val1 = mask[j, 0]
        val2 = mask[j, 1]
        replaced_mat[val1, val2] = 1.0
        replaced_mat[val2, val1] = 1.0
    f_mask = replaced_mat
    check = f_mask.max()
    out_matrix[:,:,i] = f_mask

In [None]:
# Compute 75% cutoff for matrix
num_mats = len(NBS_list)
cutoff = round(.75*num_mats)
summed_mat =  np.sum(out_matrix, axis=2)

#Create new adjacency matrix for values > cutoff value
cutoff_mask = np.argwhere(summed_mat >= cutoff)
final_mat = np.zeros([368, 368])
for j in range(0, len(cutoff_mask)):
    val1 = cutoff_mask[j, 0]
    val2 = cutoff_mask[j, 1]
    final_mat[val1, val2] = 1.0
    final_mat[val2, val1] = 1.0

# Write final matrix to CSV
pd.DataFrame(final_mat).to_csv(home + '/Final_Adjacency_Matrix_nRuns{}_{}.csv'.format(len(NBS_list), today))

In [None]:
def filter_mask(mask):
    mask = mask.tolist()
    final_mask = []
    for i in range(0, len(mask)):
        pair = mask[i]
        v1 = pair[0]
        v2 = pair[1]
        if [v2, v1]  in final_mask:
            pass
        else:
            final_mask.append([v1, v2])
    print("Original mask length: {}".format(len(mask)))
    print("Length with duplicates removed: {}".format(len(final_mask)))
    return final_mask
    
network_mask = filter_mask(cutoff_mask)
pd.DataFrame(network_mask).to_csv(out + '/Final_Mask_nRuns{}_{}.csv'.format(len(NBS_list), today))

In [None]:
print('{} runs'.format(len(NBS_list)))


In [None]:
# Read in results
net_file_temp = out + '/Feb2021_Runs/Folds/Fold*_LMEOutput_GenotypeDiffs_{}Tail*.pkl'

def get_avg_sig(file_temp, intx, intx_valence, intx_name):
    output = []
    files = glob(file_temp.format(intx_name))
    for i in range(0, len(files)):
        res = sm.load(files[i])
        tvals = pd.DataFrame(res.tvalues)
        pvals = pd.DataFrame(res.pvalues)
        if intx == 'T':
            gen_vals = [tvals.loc['genotype[T.2]',:].values[0], pvals.loc['genotype[T.2]',:].values[0], 
                        tvals.loc['net_amyg_{}_var:genotype[T.2]'.format(intx_valence)].values[0], pvals.loc['net_amyg_{}_var:genotype[T.2]'.format(intx_valence)].values[0]]
            output.append(gen_vals)
        else:
            gen_vals = [tvals.loc['genotype[T.2]',:].values[0], pvals.loc['genotype[T.2]',:].values[0]]
            output.append(gen_vals)
    return pd.DataFrame(output)

def reshape_means(dfs, labels):
    output = []
    for i in range(0, len(dfs)):
        df = dfs[i]
        label = labels[i]
        tval = round(df.mean(axis=0)[0], 3)
        pval = round(df.mean(axis=0)[1], 3)
        output.append([label, tval, pval])
    return pd.DataFrame(output, columns =  ['Model', 't-value', 'p-value'])
    
net_pos_gen = get_avg_sig(net_file_temp, 'F', 'pos', 'Pos')
net_neg_gen = get_avg_sig(net_file_temp, 'F', 'neg', 'Neg')
assert len(net_pos_gen) > 0

df_list = [net_pos_gen, net_neg_gen]
label_list = ['Network-Genotype Positive Network', 'Network-Genotype Negative Network']

results = reshape_means(df_list, label_list)