#### This is a note book to analyze aggregated RDR results 

In [1]:
import pandas as pd
import numpy as np

# Define custom MAD (Median Absolute Deviation) function
def mad(x):
    """Calculate Median Absolute Deviation"""
    return np.median(np.abs(x - np.median(x)))

In [2]:
# read the data
results = pd.read_csv("Results/aggregated_rdr_gcta_results_v2.tsv", sep="\t")

# print the column names
print("Column names in the results DataFrame:")
for col in results.columns:
    print(col)

# only use results with n = 8000
results = results[results['sample_size'] == 8000]


Column names in the results DataFrame:
condition
replication
sample_size
trait
VG1_est
VG1_se
VG2_est
VG2_se
VG3_est
VG3_se
Ve_est
Ve_se
Vp_est
Vp_se
VG1_Vp_est
VG1_Vp_se
VG2_Vp_est
VG2_Vp_se
VG3_Vp_est
VG3_Vp_se
Sum_of_VG_Vp_est
Sum_of_VG_Vp_se
logL
n_from_file


In [3]:
# create summary statistics for VG1_Vp_est based on different conditions, traits, and sample sizes
print("Summary statistics for VG1_Vp_est by condition, trait, and sample size:")
print("=" * 70)

# Overall summary statistics for VG1_Vp_est
print("\n1. Overall VG1_Vp_est summary:")
print(results['VG1_Vp_est'].describe())

# Summary by condition
print("\n2. VG1_Vp_est summary by condition:")
condition_summary = results.groupby('condition')['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary)

# Summary by trait
print("\n3. VG1_Vp_est summary by trait:")
trait_summary = results.groupby('trait')['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary)

# Summary by sample size
print("\n4. VG1_Vp_est summary by sample size:")
sample_size_summary = results.groupby('sample_size')['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary)

# Summary by condition and trait
print("\n5. VG1_Vp_est summary by condition and trait:")
condition_trait_summary = results.groupby(['condition', 'trait'])['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary)

# Summary by condition and sample size
print("\n6. VG1_Vp_est summary by condition and sample size:")
condition_size_summary = results.groupby(['condition', 'sample_size'])['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary)

# Summary by trait and sample size
print("\n7. VG1_Vp_est summary by trait and sample size:")
trait_size_summary = results.groupby(['trait', 'sample_size'])['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary)

# Three-way summary (condition, trait, sample size)
print("\n8. VG1_Vp_est summary by condition, trait, and sample size:")
three_way_summary = results.groupby(['condition', 'trait', 'sample_size'])['VG1_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary)

# write the three-way summary to a file
three_way_summary.to_csv("three_way_summary_VG1_Vp_est.tsv", sep="\t")

Summary statistics for VG1_Vp_est by condition, trait, and sample size:

1. Overall VG1_Vp_est summary:
count    219.000000
mean       0.565301
std        0.017266
min        0.502539
25%        0.554383
50%        0.564347
75%        0.576948
max        0.614521
Name: VG1_Vp_est, dtype: float64

2. VG1_Vp_est summary by condition:
                 count    mean     std     min     max  median     mad
condition                                                             
phenoVT_phenoAM    219  0.5653  0.0173  0.5025  0.6145  0.5643  0.0109

3. VG1_Vp_est summary by trait:
       count    mean     std     min     max  median     mad
trait                                                       
Y1       218  0.5654  0.0172  0.5025  0.6145  0.5646  0.0109
Y2         1  0.5422     NaN  0.5422  0.5422  0.5422  0.0000

4. VG1_Vp_est summary by sample size:
             count    mean     std     min     max  median     mad
sample_size                                                       
800

In [4]:
# summary statistics for VG2_Vp_est by condition, trait, and sample size

In [5]:
print("Summary statistics for VG2_Vp_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG2_Vp_est
print("\n1. Overall VG2_Vp_est summary:")
print(results['VG2_Vp_est'].describe())

# 2. Summary by condition
print("\n2. VG2_Vp_est summary by condition:")
condition_summary_vg2 = results.groupby('condition')['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary_vg2)

# 3. Summary by trait
print("\n3. VG2_Vp_est summary by trait:")
trait_summary_vg2 = results.groupby('trait')['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary_vg2)

# 4. Summary by sample size
print("\n4. VG2_Vp_est summary by sample size:")
sample_size_summary_vg2 = results.groupby('sample_size')['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary_vg2)

# 5. Summary by condition and trait
print("\n5. VG2_Vp_est summary by condition and trait:")
condition_trait_summary_vg2 = results.groupby(['condition', 'trait'])['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary_vg2)

# 6. Summary by condition and sample size
print("\n6. VG2_Vp_est summary by condition and sample size:")
condition_size_summary_vg2 = results.groupby(['condition', 'sample_size'])['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary_vg2)

# 7. Summary by trait and sample size
print("\n7. VG2_Vp_est summary by trait and sample size:")
trait_size_summary_vg2 = results.groupby(['trait', 'sample_size'])['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary_vg2)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG2_Vp_est summary by condition, trait, and sample size:")
three_way_summary_vg2 = results.groupby(['condition', 'trait', 'sample_size'])['VG2_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vg2)

#write the three-way summary for VG2_Vp_est to a file
three_way_summary_vg2.to_csv("three_way_summary_VG2_Vp_est.tsv", sep="\t")

Summary statistics for VG2_Vp_est by condition, trait, and sample size:

1. Overall VG2_Vp_est summary:
count    219.000000
mean       0.094949
std        0.007676
min        0.062573
25%        0.090102
50%        0.094919
75%        0.099704
max        0.119325
Name: VG2_Vp_est, dtype: float64

2. VG2_Vp_est summary by condition:
                 count    mean     std     min     max  median     mad
condition                                                             
phenoVT_phenoAM    219  0.0949  0.0077  0.0626  0.1193  0.0949  0.0049

3. VG2_Vp_est summary by trait:
       count    mean     std     min     max  median     mad
trait                                                       
Y1       218  0.0951  0.0074  0.0750  0.1193  0.0949  0.0048
Y2         1  0.0626     NaN  0.0626  0.0626  0.0626  0.0000

4. VG2_Vp_est summary by sample size:
             count    mean     std     min     max  median     mad
sample_size                                                       
800

In [6]:
print("Summary statistics for VG3_Vp_est by condition, trait, and sample size:")
print("=" * 70)

# 1. Overall summary statistics for VG3_Vp_est
print("\n1. Overall VG3_Vp_est summary:")
print(results['VG3_Vp_est'].describe())

# 2. Summary by condition
print("\n2. VG3_Vp_est summary by condition:")
condition_summary_vg3 = results.groupby('condition')['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_summary_vg3)

# 3. Summary by trait
print("\n3. VG3_Vp_est summary by trait:")
trait_summary_vg3 = results.groupby('trait')['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_summary_vg3)

# 4. Summary by sample size
print("\n4. VG3_Vp_est summary by sample size:")
sample_size_summary_vg3 = results.groupby('sample_size')['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(sample_size_summary_vg3)

# 5. Summary by condition and trait
print("\n5. VG3_Vp_est summary by condition and trait:")
condition_trait_summary_vg3 = results.groupby(['condition', 'trait'])['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_trait_summary_vg3)

# 6. Summary by condition and sample size
print("\n6. VG3_Vp_est summary by condition and sample size:")
condition_size_summary_vg3 = results.groupby(['condition', 'sample_size'])['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(condition_size_summary_vg3)

# 7. Summary by trait and sample size
print("\n7. VG3_Vp_est summary by trait and sample size:")
trait_size_summary_vg3 = results.groupby(['trait', 'sample_size'])['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(trait_size_summary_vg3)

# 8. Three-way summary: condition, trait and sample size
print("\n8. VG3_Vp_est summary by condition, trait, and sample size:")
three_way_summary_vg3 = results.groupby(['condition', 'trait', 'sample_size'])['VG3_Vp_est'].agg([
    'count', 'mean', 'std', 'min', 'max', 'median', mad
]).round(4)
print(three_way_summary_vg3)

# write the three-way summary for VG3_Vp_est to a file
three_way_summary_vg3.to_csv("three_way_summary_VG3_Vp_est.tsv", sep="\t")

Summary statistics for VG3_Vp_est by condition, trait, and sample size:

1. Overall VG3_Vp_est summary:
count    219.000000
mean       0.179863
std        0.013195
min        0.145646
25%        0.170673
50%        0.179907
75%        0.188181
max        0.250005
Name: VG3_Vp_est, dtype: float64

2. VG3_Vp_est summary by condition:
                 count    mean     std     min   max  median     mad
condition                                                           
phenoVT_phenoAM    219  0.1799  0.0132  0.1456  0.25  0.1799  0.0083

3. VG3_Vp_est summary by trait:
       count    mean     std     min     max  median     mad
trait                                                       
Y1       218  0.1795  0.0123  0.1456  0.2141  0.1799  0.0083
Y2         1  0.2500     NaN  0.2500  0.2500  0.2500  0.0000

4. VG3_Vp_est summary by sample size:
             count    mean     std     min   max  median     mad
sample_size                                                     
8000         