#### This is a note book to analyze aggregated RDR results 

In [12]:
import pandas as pd
import numpy as np

# Define custom MAD (Median Absolute Deviation) function
def mad(x):
    """Calculate Median Absolute Deviation"""
    return np.median(np.abs(x - np.median(x)))

In [13]:
# read the data
results = pd.read_csv("aggregated_rdr_gcta_results_0622.tsv", sep="\t")

# print the column names
print("Column names in the results DataFrame:")
for col in results.columns:
    print(col)

# only use results with n = 8000
results = results[results['sample_size'] == 8000]


Column names in the results DataFrame:
condition
replication
sample_size
trait
VG1_est
VG1_se
VG2_est
VG2_se
VG3_est
VG3_se
Ve_est
Ve_se
Vp_est
Vp_se
VG1_Vp_est
VG1_Vp_se
VG2_Vp_est
VG2_Vp_se
VG3_Vp_est
VG3_Vp_se
Sum_of_VG_Vp_est
Sum_of_VG_Vp_se
logL
n_from_file


In [14]:
# create summary statistics for VG1_est based on different conditions, traits, and sample sizes
print("Summary statistics for VG1_est by condition, trait, and sample size:")
print("=" * 70)

# Overall summary statistics for VG1_est
print("\n1. Overall VG1_est summary:")
print(results['VG1_est'].describe())

# Summary by condition
print("\n2. VG1_est summary by condition:")
condition_summary = results.groupby('condition')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary)

# Summary by trait
print("\n3. VG1_est summary by trait:")
trait_summary = results.groupby('trait')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary)

# Summary by sample size
print("\n4. VG1_est summary by sample size:")
sample_size_summary = results.groupby('sample_size')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary)

# Summary by condition and trait
print("\n5. VG1_est summary by condition and trait:")
condition_trait_summary = results.groupby(['condition', 'trait'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary)

# Summary by condition and sample size
print("\n6. VG1_est summary by condition and sample size:")
condition_size_summary = results.groupby(['condition', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary)

# Summary by trait and sample size
print("\n7. VG1_est summary by trait and sample size:")
trait_size_summary = results.groupby(['trait', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary)

# Three-way summary (condition, trait, sample size)
print("\n8. VG1_est summary by condition, trait, and sample size:")
three_way_summary = results.groupby(['condition', 'trait', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary)

# write the three-way summary to a file
three_way_summary.to_csv("three_way_summary_VG1_est.tsv", sep="\t")

Summary statistics for VG1_est by condition, trait, and sample size:

1. Overall VG1_est summary:
count    7103.000000
mean        0.799696
std         0.068878
min         0.000001
25%         0.752159
50%         0.797987
75%         0.844301
max         1.121146
Name: VG1_est, dtype: float64

2. VG1_est summary by condition:
                   count    mean     std     min     max  median     mad
condition                                                               
phenoVT_geneticAM   1740  0.8009  0.0696  0.6335  1.0645  0.8003  0.0474
phenoVT_phenoAM     1841  0.7999  0.0680  0.5677  1.0270  0.7992  0.0463
phenoVT_socialAM    1712  0.8001  0.0703  0.0000  1.0319  0.7962  0.0443
socialVT_phenoAM    1810  0.7980  0.0678  0.6041  1.1211  0.7963  0.0466

3. VG1_est summary by trait:
       count    mean     std     min     max  median     mad
trait                                                       
Y1      3586  0.7964  0.0711  0.5677  1.1211  0.7938  0.0485
Y2      3517  0.803

In [15]:
# summary statistics for VG2_est by condition, trait, and sample size

In [16]:
print("Summary statistics for VG2_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG2_est
print("\n1. Overall VG2_est summary:")
print(results['VG2_est'].describe())

# 2. Summary by condition
print("\n2. VG2_est summary by condition:")
condition_summary_vg2 = results.groupby('condition')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary_vg2)

# 3. Summary by trait
print("\n3. VG2_est summary by trait:")
trait_summary_vg2 = results.groupby('trait')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary_vg2)

# 4. Summary by sample size
print("\n4. VG2_est summary by sample size:")
sample_size_summary_vg2 = results.groupby('sample_size')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary_vg2)

# 5. Summary by condition and trait
print("\n5. VG2_est summary by condition and trait:")
condition_trait_summary_vg2 = results.groupby(['condition', 'trait'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary_vg2)

# 6. Summary by condition and sample size
print("\n6. VG2_est summary by condition and sample size:")
condition_size_summary_vg2 = results.groupby(['condition', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary_vg2)

# 7. Summary by trait and sample size
print("\n7. VG2_est summary by trait and sample size:")
trait_size_summary_vg2 = results.groupby(['trait', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary_vg2)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG2_est summary by condition, trait, and sample size:")
three_way_summary_vg2 = results.groupby(['condition', 'trait', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vg2)

#write the three-way summary for VG2_est to a file
three_way_summary_vg2.to_csv("three_way_summary_VG2_est.tsv", sep="\t")

Summary statistics for VG2_est by condition, trait, and sample size:

1. Overall VG2_est summary:
count    7103.000000
mean        0.083305
std         0.046289
min         0.000001
25%         0.022994
50%         0.091798
75%         0.121922
max         0.182191
Name: VG2_est, dtype: float64

2. VG2_est summary by condition:
                   count    mean     std     min     max  median     mad
condition                                                               
phenoVT_geneticAM   1740  0.1105  0.0227  0.0671  0.1716  0.1086  0.0201
phenoVT_phenoAM     1841  0.1130  0.0231  0.0681  0.1822  0.1118  0.0210
phenoVT_socialAM    1712  0.0994  0.0196  0.0000  0.1545  0.0998  0.0174
socialVT_phenoAM    1810  0.0117  0.0051  0.0000  0.0334  0.0114  0.0035

3. VG2_est summary by trait:
       count    mean     std  min     max  median     mad
trait                                                    
Y1      3586  0.0972  0.0527  0.0  0.1822  0.1217  0.0142
Y2      3517  0.0691  0.0331

In [17]:
print("Summary statistics for VG3_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG3_est
print("\n1. Overall VG3_est summary:")
print(results['VG3_est'].describe())

# 2. Summary by condition
print("\n2. VG3_est summary by condition:")
condition_summary_vg3 = results.groupby('condition')['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary_vg3)

# 3. Summary by trait
print("\n3. VG3_est summary by trait:")
trait_summary_vg3 = results.groupby('trait')['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary_vg3)

# 4. Summary by sample size
print("\n4. VG3_est summary by sample size:")
sample_size_summary_vg3 = results.groupby('sample_size')['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary_vg3)

# 5. Summary by condition and trait
print("\n5. VG3_est summary by condition and trait:")
condition_trait_summary_vg3 = results.groupby(['condition', 'trait'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary_vg3)

# 6. Summary by condition and sample size
print("\n6. VG3_est summary by condition and sample size:")
condition_size_summary_vg3 = results.groupby(['condition', 'sample_size'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary_vg3)

# 7. Summary by trait and sample size
print("\n7. VG3_est summary by trait and sample size:")
trait_size_summary_vg3 = results.groupby(['trait', 'sample_size'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary_vg3)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG3_est summary by condition, trait, and sample size:")
three_way_summary_vg3 = results.groupby(['condition', 'trait', 'sample_size'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vg3)

# write the three-way summary for VG3_est to a file
three_way_summary_vg3.to_csv("three_way_summary_VG3_est.tsv", sep="\t")

Summary statistics for VG3_est by condition, trait, and sample size:

1. Overall VG3_est summary:
count    7103.000000
mean        0.249564
std         0.102724
min         0.047692
25%         0.137697
50%         0.262224
75%         0.338618
max         0.561293
Name: VG3_est, dtype: float64

2. VG3_est summary by condition:
                   count    mean     std     min     max  median     mad
condition                                                               
phenoVT_geneticAM   1740  0.3050  0.0579  0.1778  0.4712  0.3034  0.0499
phenoVT_phenoAM     1841  0.3058  0.0596  0.1708  0.4662  0.3065  0.0520
phenoVT_socialAM    1712  0.2934  0.0568  0.1421  0.5613  0.2950  0.0485
socialVT_phenoAM    1810  0.0975  0.0202  0.0477  0.1576  0.0968  0.0161

3. VG3_est summary by trait:
       count    mean     std     min     max  median     mad
trait                                                       
Y1      3586  0.2087  0.0787  0.0477  0.3613  0.2390  0.0306
Y2      3517  0.291

In [18]:
print("Summary statistics for Ve_est by condition, trait, and sample size:")
print("=" * 70)

# Summary by condition, trait and sample size
three_way_summary_ve = results.groupby(['condition', 'trait', 'sample_size'])['Ve_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_ve)

# write the three-way summary for Ve_est to a file
three_way_summary_ve.to_csv("three_way_summary_Ve_est.tsv", sep="\t")

Summary statistics for Ve_est by condition, trait, and sample size:
                                     count    mean     std     min     max  \
condition         trait sample_size                                          
phenoVT_geneticAM Y1    8000           870  0.2250  0.0038  0.2139  0.2351   
                  Y2    8000           870  0.2169  0.0039  0.2041  0.2285   
phenoVT_phenoAM   Y1    8000           921  0.2262  0.0038  0.2144  0.2385   
                  Y2    8000           920  0.2175  0.0038  0.2044  0.2290   
phenoVT_socialAM  Y1    8000           889  0.2286  0.0040  0.2164  0.2428   
                  Y2    8000           823  0.2201  0.0038  0.2106  0.2315   
socialVT_phenoAM  Y1    8000           906  0.3177  0.0055  0.3020  0.3341   
                  Y2    8000           904  0.3890  0.0073  0.3674  0.4118   

                                     median     mad  
condition         trait sample_size                  
phenoVT_geneticAM Y1    8000         0.2249

In [19]:
print("Summary statistics for Vp_est by condition, trait, and sample size:")
print("=" * 70)

# Summary by condition, trait and sample size
three_way_summary_vp = results.groupby(['condition', 'trait', 'sample_size'])['Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vp)

# write the three-way summary for Vp_est to a file
three_way_summary_vp.to_csv("three_way_summary_Vp_est.tsv", sep="\t")

Summary statistics for Vp_est by condition, trait, and sample size:
                                     count    mean     std     min     max  \
condition         trait sample_size                                          
phenoVT_geneticAM Y1    8000           870  1.4074  0.0962  1.2183  1.6919   
                  Y2    8000           870  1.4675  0.1045  1.2209  1.8598   
phenoVT_phenoAM   Y1    8000           921  1.4131  0.0970  1.1087  1.7340   
                  Y2    8000           920  1.4681  0.0963  1.1731  1.8136   
phenoVT_socialAM  Y1    8000           889  1.3849  0.1013  1.1318  1.7104   
                  Y2    8000           823  1.4523  0.0925  0.7812  1.7271   
socialVT_phenoAM  Y1    8000           906  1.2039  0.0678  1.0352  1.4984   
                  Y2    8000           904  1.3173  0.0651  1.1233  1.5338   

                                     median     mad  
condition         trait sample_size                  
phenoVT_geneticAM Y1    8000         1.4023