#### This is a note book to analyze aggregated RDR results 

In [7]:
import pandas as pd
import numpy as np

In [8]:
# read the data
results = pd.read_csv("aggregated_rdr_gcta_results_final.tsv", sep="\t")

# print the column names
print("Column names in the results DataFrame:")
for col in results.columns:
    print(col)

# only use results with n = 8000
results = results[results['sample_size'] == 8000]


Column names in the results DataFrame:
condition
replication
sample_size
trait
VG1_est
VG1_se
VG2_est
VG2_se
VG3_est
VG3_se
Ve_est
Ve_se
Vp_est
Vp_se
VG1_Vp_est
VG1_Vp_se
VG2_Vp_est
VG2_Vp_se
VG3_Vp_est
VG3_Vp_se
Sum_of_VG_Vp_est
Sum_of_VG_Vp_se
logL
n_from_file


In [9]:
# create summary statistics for VG1_est based on different conditions, traits, and sample sizes
print("Summary statistics for VG1_est by condition, trait, and sample size:")
print("=" * 70)

# Overall summary statistics for VG1_est
print("\n1. Overall VG1_est summary:")
print(results['VG1_est'].describe())

# Summary by condition
print("\n2. VG1_est summary by condition:")
condition_summary = results.groupby('condition')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(condition_summary)

# Summary by trait
print("\n3. VG1_est summary by trait:")
trait_summary = results.groupby('trait')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(trait_summary)

# Summary by sample size
print("\n4. VG1_est summary by sample size:")
sample_size_summary = results.groupby('sample_size')['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(sample_size_summary)

# Summary by condition and trait
print("\n5. VG1_est summary by condition and trait:")
condition_trait_summary = results.groupby(['condition', 'trait'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(condition_trait_summary)

# Summary by condition and sample size
print("\n6. VG1_est summary by condition and sample size:")
condition_size_summary = results.groupby(['condition', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(condition_size_summary)

# Summary by trait and sample size
print("\n7. VG1_est summary by trait and sample size:")
trait_size_summary = results.groupby(['trait', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(trait_size_summary)

# Three-way summary (condition, trait, sample size)
print("\n8. VG1_est summary by condition, trait, and sample size:")
three_way_summary = results.groupby(['condition', 'trait', 'sample_size'])['VG1_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(three_way_summary)

Summary statistics for VG1_est by condition, trait, and sample size:

1. Overall VG1_est summary:
count    7630.000000
mean        0.800385
std         0.068998
min         0.567684
25%         0.752639
50%         0.799342
75%         0.844423
max         1.079065
Name: VG1_est, dtype: float64

2. VG1_est summary by condition:
                   count    mean     std     min     max  median
condition                                                       
phenoVT_geneticAM   1768  0.8016  0.0695  0.6225  1.0645  0.8014
phenoVT_phenoAM     2227  0.8012  0.0686  0.5677  1.0270  0.8009
phenoVT_socialAM    1651  0.8017  0.0681  0.5878  1.0319  0.7981
socialVT_phenoAM    1984  0.7973  0.0697  0.5874  1.0791  0.7974

3. VG1_est summary by trait:
       count    mean     std     min     max  median
trait                                               
Y1      3952  0.7979  0.0706  0.5677  1.0319  0.7971
Y2      3678  0.8030  0.0672  0.5874  1.0791  0.8017

4. VG1_est summary by sample size:
  

In [10]:
# summary statistics for VG2_est by condition, trait, and sample size

In [11]:
print("Summary statistics for VG2_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG2_est
print("\n1. Overall VG2_est summary:")
print(results['VG2_est'].describe())

# 2. Summary by condition
print("\n2. VG2_est summary by condition:")
condition_summary_vg2 = results.groupby('condition')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(condition_summary_vg2)

# 3. Summary by trait
print("\n3. VG2_est summary by trait:")
trait_summary_vg2 = results.groupby('trait')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(trait_summary_vg2)

# 4. Summary by sample size
print("\n4. VG2_est summary by sample size:")
sample_size_summary_vg2 = results.groupby('sample_size')['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(sample_size_summary_vg2)

# 5. Summary by condition and trait
print("\n5. VG2_est summary by condition and trait:")
condition_trait_summary_vg2 = results.groupby(['condition', 'trait'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(condition_trait_summary_vg2)

# 6. Summary by condition and sample size
print("\n6. VG2_est summary by condition and sample size:")
condition_size_summary_vg2 = results.groupby(['condition', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(condition_size_summary_vg2)

# 7. Summary by trait and sample size
print("\n7. VG2_est summary by trait and sample size:")
trait_size_summary_vg2 = results.groupby(['trait', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(trait_size_summary_vg2)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG2_est summary by condition, trait, and sample size:")
three_way_summary_vg2 = results.groupby(['condition', 'trait', 'sample_size'])['VG2_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median'
]).round(4)
print(three_way_summary_vg2)

Summary statistics for VG2_est by condition, trait, and sample size:

1. Overall VG2_est summary:
count    7630.000000
mean        0.081157
std         0.051454
min         0.000001
25%         0.004376
50%         0.092569
75%         0.124074
max         0.182191
Name: VG2_est, dtype: float64

2. VG2_est summary by condition:
                   count    mean     std     min     max  median
condition                                                       
phenoVT_geneticAM   1768  0.1106  0.0228  0.0646  0.1716  0.1092
phenoVT_phenoAM     2227  0.1155  0.0232  0.0681  0.1822  0.1214
phenoVT_socialAM    1651  0.0995  0.0196  0.0576  0.1545  0.0993
socialVT_phenoAM    1984  0.0011  0.0014  0.0000  0.0074  0.0004

3. VG2_est summary by trait:
       count    mean     std  min     max  median
trait                                            
Y1      3952  0.0964  0.0563  0.0  0.1822  0.1231
Y2      3678  0.0647  0.0395  0.0  0.1208  0.0837

4. VG2_est summary by sample size:
             c