In [None]:
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt

In [None]:
def plot_Jaccard_figure(data,traits,t,my_type,output_file,times):
    
    trait_num = data.shape[1]
    total_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#8c564b', '#9467bd', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    colors = total_colors[:trait_num]
    
    fig = plt.figure(figsize=(16,8))
    for i in range(trait_num):
        subsampling_times = times[i]
        x = np.arange(0,subsampling_times*100,100) + 100
        plt.plot(x,data[:subsampling_times,i],'k',color=colors[i],label=traits[i],linewidth=4)
 
    plt.ylabel('Jaccard index',fontsize=30)
    if t == 'fdr':
        my_title = my_type + ": FDR"
    elif t == 'bonferroni':
        my_title = my_type + ": " + t
    else:
        my_title = my_type + ": p < " + t
    
    ax = plt.gca()
    ax.spines['top'].set_linewidth(2)
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)
    ax.spines['right'].set_linewidth(2)
    
    plt.title(my_title,fontsize=30)
    plt.ylim(-0.05,1.1)
    plt.xlim(-500,15000)
    plt.xlabel('sample size',fontsize=30)
    plt.tick_params(direction='out', length=10, width=4,labelsize=30)
#     plt.show()
    fig.savefig(output_file,dpi=300,bbox_inches = 'tight')

In [None]:
files = ['Age','BMI','IQ','Numeric_memory','Neuroticism','Alcohol','Birth']
ts = ['0.05','0.01','fdr','bonferroni']
subsampling_times = 150
for t in ts:
    print(t)
    Jaccard_CSA_data = np.zeros((subsampling_times,len(files)))
    Jaccard_CT_data = np.zeros((subsampling_times,len(files)))
    Jaccard_FC_data = np.zeros((subsampling_times,len(files)))
    real_subsampling_times = []
    for i in range(len(files)):
        f = files[i]
        print(f)
        CT_reliability = pd.read_csv('new_results/'+f+'/CT_Jaccard_index_'+t+'.csv')
        CSA_reliability = pd.read_csv('new_results/'+f+'/CSA_Jaccard_index_'+t+'.csv')
        FC_reliability = pd.read_csv('new_results/'+f+'/FC_Jaccard_index_'+t+'.csv')
        
        tmp = CT_reliability.shape[0]
        if tmp > subsampling_times:
            tmp = subsampling_times
        
        real_subsampling_times.append(tmp)
        
        Jaccard_CSA_data[:tmp,i] = CSA_reliability.iloc[:tmp,0].values
        Jaccard_CT_data[:tmp,i] = CT_reliability.iloc[:tmp,0].values
        Jaccard_FC_data[:tmp,i] = FC_reliability.iloc[:tmp,0].values

    plot_Jaccard_figure(Jaccard_CSA_data,files,t,'CSA','new_figures/CSA_Jaccard_'+t+'.png',real_subsampling_times)
    plot_Jaccard_figure(Jaccard_CT_data,files,t,'CT','new_figures/CT_Jaccard_'+t+'.png',real_subsampling_times)
    plot_Jaccard_figure(Jaccard_FC_data,files,t,'FC','new_figures/FC_Jaccard_'+t+'.png',real_subsampling_times)

In [None]:
def plot_ICC_figure(data,traits,t,my_type,output_file,times):
    
    trait_num = data.shape[1]
    total_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#8c564b', '#9467bd', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
    colors = total_colors[:trait_num]
    
    fig = plt.figure(figsize=(16,8))
    total_pos1 = []
    total_pos2 = []
    for i in range(trait_num):
        subsampling_times = times[i]
        x = np.arange(0,subsampling_times*100,100) + 100
        plt.plot(x,data[:subsampling_times,i],'k',color=colors[i],label=traits[i],linewidth=4)

        for j in range(subsampling_times):
            if data[j,i] > 0.5:
                my_pos = (j + 1)*100
                plt.scatter(my_pos, data[j,i], marker='s', color='black',s=80)
                total_pos1.append(my_pos)
                break
        
        for j in range(subsampling_times):   
            if data[j,i] > 0.75:
                my_pos = (j + 1)*100
                plt.scatter(my_pos, data[j,i], marker='s', color='black',s=80)
                total_pos2.append(my_pos)
                break
                
    print(my_type,t,"pos above 0.5:",total_pos1)
    print(my_type,t,"pos above 0.75:",total_pos2)  
    plt.hlines(y=0.5,xmin=-500,xmax=15000,linestyles='dashed',linewidth=3,color ='gray')
    plt.hlines(y=0.75,xmin=-500,xmax=15000,linestyles='dashed',linewidth=3,color ='black')
#     if t == '1':
#         my_title = my_type + ": the largest 100% measures (all measures)"
#     else:
#         my_title = my_type + ": the largest " + str(int(float(t) * 100)) + "% measures"
    my_title = my_type + ": the largest " + str(int(float(t) * 100)) + "% measures"
    
    ax = plt.gca()
    ax.spines['top'].set_linewidth(2)
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)
    ax.spines['right'].set_linewidth(2)
    
    plt.title(my_title,fontsize=30)
    plt.ylabel('ICC',fontsize=30)
    plt.ylim(-0.05,1.1)
    plt.xlim(-500,15000)
    plt.xlabel('sample size',fontsize=30)
    plt.tick_params(direction='out', length=10, width=4,labelsize=30)
#     plt.show()
    fig.savefig(output_file,dpi=300,bbox_inches = 'tight')

In [None]:
files = ['Age','BMI','IQ','Numeric_memory','Neuroticism','Alcohol','Birth']
ts = ['0.1','0.2','0.25','0.5','1']
subsampling_times = 150
for t in ts:
#     print(t)
    ICC_CSA_data = np.zeros((subsampling_times,len(files)))
    ICC_CT_data = np.zeros((subsampling_times,len(files)))
    ICC_FC_data = np.zeros((subsampling_times,len(files)))
    real_subsampling_times = []
    for i in range(len(files)):
        f = files[i]
#         print(f)
        CT_reliability = pd.read_csv('new_results/'+f+'/CT_ICC_'+t+'.csv')
        CSA_reliability = pd.read_csv('new_results/'+f+'/CSA_ICC_'+t+'.csv')
        FC_reliability = pd.read_csv('new_results/'+f+'/FC_ICC_'+t+'.csv')
        
        tmp = CT_reliability.shape[0]
        if tmp > subsampling_times:
            tmp = subsampling_times
        
        real_subsampling_times.append(tmp)
        
        ICC_CSA_data[:tmp,i] = CSA_reliability.iloc[:tmp,0].values
        ICC_CT_data[:tmp,i] = CT_reliability.iloc[:tmp,0].values
        ICC_FC_data[:tmp,i] = FC_reliability.iloc[:tmp,0].values

    plot_ICC_figure(ICC_CT_data,files,t,'CSA','new_figures/CSA_ICC_'+t+'.png',real_subsampling_times)
    plot_ICC_figure(ICC_CSA_data,files,t,'CT','new_figures/CT_ICC_'+t+'.png',real_subsampling_times)
    plot_ICC_figure(ICC_FC_data,files,t,'FC','new_figures/FC_ICC_'+t+'.png',real_subsampling_times)