#### This is a note book to analyze aggregated RDR results 

In [4]:
import pandas as pd
import numpy as np

# Define custom MAD (Median Absolute Deviation) function
def mad(x):
    """Calculate Median Absolute Deviation"""
    return np.median(np.abs(x - np.median(x)))

In [5]:
# read the data
results = pd.read_csv("aggregated_rdr_gcta_results_final.tsv", sep="\t")

# print the column names
print("Column names in the results DataFrame:")
for col in results.columns:
    print(col)

# only use results with n = 8000
results = results[results['sample_size'] == 8000]


Column names in the results DataFrame:
condition
replication
sample_size
trait
VG1_est
VG1_se
VG2_est
VG2_se
VG3_est
VG3_se
Ve_est
Ve_se
Vp_est
Vp_se
VG1_Vp_est
VG1_Vp_se
VG2_Vp_est
VG2_Vp_se
VG3_Vp_est
VG3_Vp_se
Sum_of_VG_Vp_est
Sum_of_VG_Vp_se
logL
n_from_file


In [6]:
# create summary statistics for VG1_est based on different conditions, traits, and sample sizes
print("Summary statistics for VG1_est by condition, trait, and sample size:")
print("=" * 70)

# Overall summary statistics for VG1_est
print("\n1. Overall VG1_est summary:")
print(results['VG1_est'].describe())

# Summary by condition
print("\n2. VG1_est summary by condition:")
condition_summary = results.groupby('condition')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary)

# Summary by trait
print("\n3. VG1_est summary by trait:")
trait_summary = results.groupby('trait')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary)

# Summary by sample size
print("\n4. VG1_est summary by sample size:")
sample_size_summary = results.groupby('sample_size')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary)

# Summary by condition and trait
print("\n5. VG1_est summary by condition and trait:")
condition_trait_summary = results.groupby(['condition', 'trait'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary)

# Summary by condition and sample size
print("\n6. VG1_est summary by condition and sample size:")
condition_size_summary = results.groupby(['condition', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary)

# Summary by trait and sample size
print("\n7. VG1_est summary by trait and sample size:")
trait_size_summary = results.groupby(['trait', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary)

# Three-way summary (condition, trait, sample size)
print("\n8. VG1_est summary by condition, trait, and sample size:")
three_way_summary = results.groupby(['condition', 'trait', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary)

# write the three-way summary to a file
three_way_summary.to_csv("three_way_summary_VG1_est.tsv", sep="\t")

Summary statistics for VG1_est by condition, trait, and sample size:

1. Overall VG1_est summary:
count    7630.000000
mean        0.800385
std         0.068998
min         0.567684
25%         0.752639
50%         0.799342
75%         0.844423
max         1.079065
Name: VG1_est, dtype: float64

2. VG1_est summary by condition:
                   count    mean     std     min     max  median     mad
condition                                                               
phenoVT_geneticAM   1768  0.8016  0.0695  0.6225  1.0645  0.8014  0.0461
phenoVT_phenoAM     2227  0.8012  0.0686  0.5677  1.0270  0.8009  0.0461
phenoVT_socialAM    1651  0.8017  0.0681  0.5878  1.0319  0.7981  0.0459
socialVT_phenoAM    1984  0.7973  0.0697  0.5874  1.0791  0.7974  0.0454

3. VG1_est summary by trait:
       count    mean     std     min     max  median     mad
trait                                                       
Y1      3952  0.7979  0.0706  0.5677  1.0319  0.7971  0.0479
Y2      3678  0.803

In [7]:
# summary statistics for VG2_est by condition, trait, and sample size

In [8]:
print("Summary statistics for VG2_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG2_est
print("\n1. Overall VG2_est summary:")
print(results['VG2_est'].describe())

# 2. Summary by condition
print("\n2. VG2_est summary by condition:")
condition_summary_vg2 = results.groupby('condition')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary_vg2)

# 3. Summary by trait
print("\n3. VG2_est summary by trait:")
trait_summary_vg2 = results.groupby('trait')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary_vg2)

# 4. Summary by sample size
print("\n4. VG2_est summary by sample size:")
sample_size_summary_vg2 = results.groupby('sample_size')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary_vg2)

# 5. Summary by condition and trait
print("\n5. VG2_est summary by condition and trait:")
condition_trait_summary_vg2 = results.groupby(['condition', 'trait'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary_vg2)

# 6. Summary by condition and sample size
print("\n6. VG2_est summary by condition and sample size:")
condition_size_summary_vg2 = results.groupby(['condition', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary_vg2)

# 7. Summary by trait and sample size
print("\n7. VG2_est summary by trait and sample size:")
trait_size_summary_vg2 = results.groupby(['trait', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary_vg2)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG2_est summary by condition, trait, and sample size:")
three_way_summary_vg2 = results.groupby(['condition', 'trait', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vg2)

#write the three-way summary for VG2_est to a file
three_way_summary_vg2.to_csv("three_way_summary_VG2_est.tsv", sep="\t")

Summary statistics for VG2_est by condition, trait, and sample size:

1. Overall VG2_est summary:
count    7630.000000
mean        0.081157
std         0.051454
min         0.000001
25%         0.004376
50%         0.092569
75%         0.124074
max         0.182191
Name: VG2_est, dtype: float64

2. VG2_est summary by condition:
                   count    mean     std     min     max  median     mad
condition                                                               
phenoVT_geneticAM   1768  0.1106  0.0228  0.0646  0.1716  0.1092  0.0203
phenoVT_phenoAM     2227  0.1155  0.0232  0.0681  0.1822  0.1214  0.0213
phenoVT_socialAM    1651  0.0995  0.0196  0.0576  0.1545  0.0993  0.0176
socialVT_phenoAM    1984  0.0011  0.0014  0.0000  0.0074  0.0004  0.0004

3. VG2_est summary by trait:
       count    mean     std  min     max  median     mad
trait                                                    
Y1      3952  0.0964  0.0563  0.0  0.1822  0.1231  0.0138
Y2      3678  0.0647  0.0395

In [9]:
print("Summary statistics for VG3_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG3_est
print("\n1. Overall VG3_est summary:")
print(results['VG3_est'].describe())

# 2. Summary by condition
print("\n2. VG3_est summary by condition:")
condition_summary_vg3 = results.groupby('condition')['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary_vg3)

# 3. Summary by trait
print("\n3. VG3_est summary by trait:")
trait_summary_vg3 = results.groupby('trait')['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary_vg3)

# 4. Summary by sample size
print("\n4. VG3_est summary by sample size:")
sample_size_summary_vg3 = results.groupby('sample_size')['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary_vg3)

# 5. Summary by condition and trait
print("\n5. VG3_est summary by condition and trait:")
condition_trait_summary_vg3 = results.groupby(['condition', 'trait'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary_vg3)

# 6. Summary by condition and sample size
print("\n6. VG3_est summary by condition and sample size:")
condition_size_summary_vg3 = results.groupby(['condition', 'sample_size'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary_vg3)

# 7. Summary by trait and sample size
print("\n7. VG3_est summary by trait and sample size:")
trait_size_summary_vg3 = results.groupby(['trait', 'sample_size'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary_vg3)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG3_est summary by condition, trait, and sample size:")
three_way_summary_vg3 = results.groupby(['condition', 'trait', 'sample_size'])['VG3_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vg3)

# write the three-way summary for VG3_est to a file
three_way_summary_vg3.to_csv("three_way_summary_VG3_est.tsv", sep="\t")

Summary statistics for VG3_est by condition, trait, and sample size:

1. Overall VG3_est summary:
count    7630.000000
mean        0.227784
std         0.132854
min         0.000001
25%         0.036187
50%         0.260805
75%         0.336504
max         0.471240
Name: VG3_est, dtype: float64

2. VG3_est summary by condition:
                   count    mean     std     min     max  median     mad
condition                                                               
phenoVT_geneticAM   1768  0.3056  0.0576  0.1778  0.4712  0.3051  0.0497
phenoVT_phenoAM     2227  0.3015  0.0596  0.1752  0.4662  0.2901  0.0509
phenoVT_socialAM    1651  0.2942  0.0569  0.1421  0.4323  0.2953  0.0492
socialVT_phenoAM    1984  0.0205  0.0092  0.0000  0.0502  0.0209  0.0063

3. VG3_est summary by trait:
       count    mean     std  min     max  median     mad
trait                                                    
Y1      3952  0.1943  0.1043  0.0  0.3613  0.2402  0.0305
Y2      3678  0.2638  0.1498

In [10]:
print("Summary statistics for Ve_est by condition, trait, and sample size:")
print("=" * 70)

# Summary by condition, trait and sample size
three_way_summary_ve = results.groupby(['condition', 'trait', 'sample_size'])['Ve_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_ve)

# write the three-way summary for Ve_est to a file
three_way_summary_ve.to_csv("three_way_summary_Ve_est.tsv", sep="\t")

Summary statistics for Ve_est by condition, trait, and sample size:
                                     count    mean     std     min     max  \
condition         trait sample_size                                          
phenoVT_geneticAM Y1    8000           884  0.2249  0.0038  0.2139  0.2361   
                  Y2    8000           884  0.2169  0.0039  0.2041  0.2295   
phenoVT_phenoAM   Y1    8000          1226  0.2262  0.0038  0.2144  0.2385   
                  Y2    8000          1001  0.2176  0.0039  0.2044  0.2290   
phenoVT_socialAM  Y1    8000           850  0.2288  0.0040  0.2164  0.2428   
                  Y2    8000           801  0.2202  0.0037  0.2106  0.2315   
socialVT_phenoAM  Y1    8000           992  0.2184  0.0037  0.2066  0.2337   
                  Y2    8000           992  0.2130  0.0035  0.2021  0.2246   

                                     median     mad  
condition         trait sample_size                  
phenoVT_geneticAM Y1    8000         0.2247

In [11]:
print("Summary statistics for Vp_est by condition, trait, and sample size:")
print("=" * 70)

# Summary by condition, trait and sample size
three_way_summary_vp = results.groupby(['condition', 'trait', 'sample_size'])['Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vp)

# write the three-way summary for Vp_est to a file
three_way_summary_vp.to_csv("three_way_summary_Vp_est.tsv", sep="\t")

Summary statistics for Vp_est by condition, trait, and sample size:
                                     count    mean     std     min     max  \
condition         trait sample_size                                          
phenoVT_geneticAM Y1    8000           884  1.4100  0.0940  1.2183  1.6826   
                  Y2    8000           884  1.4673  0.1067  1.2065  1.8598   
phenoVT_phenoAM   Y1    8000          1226  1.4158  0.0965  1.1087  1.7340   
                  Y2    8000          1001  1.4709  0.0993  1.1731  1.8136   
phenoVT_socialAM  Y1    8000           850  1.3852  0.1009  1.1318  1.6844   
                  Y2    8000           801  1.4570  0.0903  1.1769  1.7271   
socialVT_phenoAM  Y1    8000           992  1.0382  0.0689  0.8585  1.2563   
                  Y2    8000           992  1.0309  0.0696  0.8328  1.3077   

                                     median     mad  
condition         trait sample_size                  
phenoVT_geneticAM Y1    8000         1.4036