In [1]:
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt
from scipy.signal import savgol_filter

In [2]:
# function to visualize the Jaccard index
def plot_Jaccard_figure(data,traits,t,my_type,output_file,times):
    
    trait_num = data.shape[1]
    total_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#8c564b', '#9467bd', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    colors = total_colors[:trait_num]
    
    fig = plt.figure(figsize=(16,8))
    for i in range(trait_num):
        subsampling_times = times[i]
        x = np.arange(0,subsampling_times*100,100) + 100
        plt.plot(x,data[:subsampling_times,i],'k',color=colors[i],label=traits[i],linewidth=4)
 
    plt.ylabel('Jaccard index',fontsize=30)
    if t == 'fdr':
        my_title = my_type + ": FDR"
    elif t == 'bonferroni':
        my_title = my_type + ": " + t
    else:
        my_title = my_type + ": p < " + t
    
    ax = plt.gca()
    ax.spines['top'].set_linewidth(3)
    ax.spines['bottom'].set_linewidth(3)
    ax.spines['left'].set_linewidth(3)
    ax.spines['right'].set_linewidth(3)
    
    plt.title(my_title,fontsize=30)
    plt.ylim(-0.05,1.1)
    plt.xlim(-500,subsampling_times*100)
    plt.xlabel('sample size',fontsize=30)
    plt.tick_params(direction='out', length=10, width=4,labelsize=30)
#     plt.show()
    fig.savefig(output_file,dpi=300,bbox_inches = 'tight')
    plt.clf()

## conducting_plot_Jaccard: main function to plot Jaccard index
### The input parameters are as below:

1. mypath is the folder which includes the Jaccard results

2. files are the phenotypes, for example, files = ['Age','BMI','IQ','NM','NS','alcohol','BM']

3. ts indictaes the significance thresholds: p<0.05, p<0.01, fdr_p<0.05, fdr_bonferroni<0.05. For example, ts = ['0.05', '0.01','fdr','bonferroni']

4. my_type indicates the imaging type: "CSA","CT", or "FC"

5. output_path is the folder to save the Jaccard figures

In [3]:
def conducting_plot_Jaccard(mypath,files,ts,my_type,output_path):
    # The maximum sample size in x axis is 16,000
    subsampling_times = 160
    for t in ts:
        Jaccard_data = np.zeros((subsampling_times,len(files)))
        real_subsampling_times = []
        for i in range(len(files)):
            f = files[i]
            reliability = pd.read_csv(mypath+f+'/'+my_type+'_Jaccard_index_'+t+'.csv')
            tmp = reliability.shape[0]
            if tmp > subsampling_times:
                tmp = subsampling_times
            real_subsampling_times.append(tmp)

            Jaccard_data[:tmp,i] = np.round(savgol_filter(reliability.iloc[:tmp,0].values,7,2),3)

        plot_Jaccard_figure(Jaccard_data,files,t,my_type,output_path+my_type+'_Jaccard_'+t+'.png',real_subsampling_times)

In [4]:
# runing the function "conducting_plot_Jaccard"
files = ['Age','BMI','IQ','NM','NS','alcohol','BM']
ts = ['0.05', '0.01','fdr','bonferroni']
conducting_plot_Jaccard('Jaccard/',files,ts,'CSA','figures/')
conducting_plot_Jaccard('Jaccard/',files,ts,'CT','figures/')
conducting_plot_Jaccard('Jaccard/',files,ts,'FC','figures/')

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

In [5]:
# function to visualize the ICC
def plot_ICC_figure(data,traits,t,my_type,output_file,times):
    
    trait_num = data.shape[1]
    total_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#8c564b', '#9467bd', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    colors = total_colors[:trait_num]
    
    fig = plt.figure(figsize=(16,8))
    total_pos1 = []
    total_pos2 = []
    for i in range(trait_num):
        subsampling_times = times[i]
        x = np.arange(0,subsampling_times*100,100) + 100
        plt.plot(x,data[:subsampling_times,i],'k',color=colors[i],label=traits[i],linewidth=4)

        for j in range(subsampling_times):
            if data[j,i] > 0.5:
                my_pos = (j + 1)*100
                plt.scatter(my_pos, data[j,i], marker='s', color='black',s=80)
                total_pos1.append(my_pos)
                break
        
        for j in range(subsampling_times):   
            if data[j,i] > 0.75:
                my_pos = (j + 1)*100
                plt.scatter(my_pos, data[j,i], marker='s', color='black',s=80)
                total_pos2.append(my_pos)
                break
                
    print(my_type,t,"pos above 0.5:",total_pos1)
    print(my_type,t,"pos above 0.75:",total_pos2)  
    plt.hlines(y=0.5,xmin=-500,xmax=subsampling_times*100,linestyles='dashed',linewidth=4,color ='gray')
    plt.hlines(y=0.75,xmin=-500,xmax=subsampling_times*100,linestyles='dashed',linewidth=4,color ='black')

    my_title = my_type + ": the largest " + str(int(float(t) * 100)) + "% measures"
    
    ax = plt.gca()
    ax.spines['top'].set_linewidth(3)
    ax.spines['bottom'].set_linewidth(3)
    ax.spines['left'].set_linewidth(3)
    ax.spines['right'].set_linewidth(3)
    
    plt.title(my_title,fontsize=30)
    plt.ylabel('ICC',fontsize=30)
    plt.ylim(-0.05,1.1)
    plt.xlim(-500,subsampling_times*100)
    plt.xlabel('sample size',fontsize=30)
    plt.tick_params(direction='out', length=10, width=4,labelsize=30)
    fig.savefig(output_file,dpi=300,bbox_inches = 'tight')
    plt.clf()

## conducting_plot_ICC: main function to plot ICC
### The input parameters are as below:

1. mypath is the folder which includes the ICC results

2. files are the phenotypes, for example, files = ['Age','BMI','IQ','NM','NS','alcohol','BM']

3. ts are t values which indicate the proportion of correlations to calculate the ICC: 0.1,0.15,0.2,0.25,0.5,1

4. my_type indicates the imaging type: "CSA","CT", or "FC"

5. output_path is the folder to save the ICC figures

In [6]:
def conducting_plot_ICC(mypath,files,ts,my_type,output_path):
    # The maximum sample size in x axis is 16,000
    subsampling_times = 160
    for t in ts:
        ICC_data = np.zeros((subsampling_times,len(files)))
        real_subsampling_times = []
        for i in range(len(files)):
            f = files[i]
            reliability = pd.read_csv(mypath+f+'/'+my_type+'_ICC_'+t+'.csv')
            tmp = reliability.shape[0]
            if tmp > subsampling_times:
                tmp = subsampling_times
            real_subsampling_times.append(tmp)

            ICC_data[:tmp,i] = np.round(savgol_filter(reliability.iloc[:tmp,0].values,7,2),3)

        plot_ICC_figure(ICC_data,files,t,my_type,output_path+my_type+'_ICC_'+t+'.png',real_subsampling_times)

In [8]:
# runing the function "conducting_plot_ICC"
files = ['Age','BMI','IQ','NM','NS','alcohol','BM']
ts = ['0.1', '0.15', '0.2', '0.25','0.5','1']
conducting_plot_ICC('ICC/',files,ts,'CSA','figures/')
conducting_plot_ICC('ICC/',files,ts,'CT','figures/')
conducting_plot_ICC('ICC/',files,ts,'FC','figures/')

CSA 0.1 pos above 0.5: [300, 1300, 4500, 2300, 12100, 13300]
CSA 0.1 pos above 0.75: [700, 3200, 14200, 5700]
CSA 0.15 pos above 0.5: [300, 1200, 4300, 2200, 12100, 13300]
CSA 0.15 pos above 0.75: [700, 3200, 11200, 5500]
CSA 0.2 pos above 0.5: [300, 1200, 4200, 2200, 12200, 13500]
CSA 0.2 pos above 0.75: [700, 3200, 10500, 5500]
CSA 0.25 pos above 0.5: [300, 1200, 4400, 2200, 13700, 14500]
CSA 0.25 pos above 0.75: [700, 3400, 10500, 5500]
CSA 0.5 pos above 0.5: [300, 1500, 4700, 2500]
CSA 0.5 pos above 0.75: [800, 4200, 12500, 6800]
CSA 1 pos above 0.5: [500, 2300, 6900, 3700]
CSA 1 pos above 0.75: [1300, 6600, 10900]
CT 0.1 pos above 0.5: [100, 600, 5700, 10500, 7600, 2100]
CT 0.1 pos above 0.75: [300, 1500, 13500, 4800]
CT 0.15 pos above 0.5: [100, 600, 5700, 10500, 7600, 2200]
CT 0.15 pos above 0.75: [300, 1400, 13400, 5100]
CT 0.2 pos above 0.5: [100, 600, 5700, 10500, 8400, 2300]
CT 0.2 pos above 0.75: [300, 1400, 14200, 5600]
CT 0.25 pos above 0.5: [100, 600, 6700, 10500, 8600, 

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>

<Figure size 1152x576 with 0 Axes>