In [8]:
import os
import pandas as pd

# Running ABCtoolbox
Use PLS to estimate parameters

working in `/vol_c/ABC_AJmodels_genome/PLS/dropIBDq`  
working with files:  
`input_ABC_HPC_OSG_CHTC_2.txt`  
which are a combination of simulations with instantanious growth from HPC and OSG, and have 
101,334 simulations 

In [1]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
pwd

/vol_c/src/ABC_results_AJ/genome_AJmodels


## Clean data

### A. Remove IBD qunatile stats and rows with missing data
Use the script `drop_columns.py`, which takes 3 arguments:
- a tab delimited input file with column names
- string in columns to be removed
- output file name

In [9]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
/opt/anaconda3/bin/python /vol_c/src/ABC_results_AJ/genome_AJmodels/drop_columns.py /vol_c/ABC_AJmodels_genome/input_ABC_HPC_OSG_CHTC_2.txt q input_ABC_HPC_OSG_CHTC_2_dropIBDq.txt
/opt/anaconda3/bin/python /vol_c/src/ABC_results_AJ/genome_AJmodels/drop_columns.py /vol_c/ABC_AJmodels_genome/results_real_output_M23.txt q results_real_output_M23_dropIBDq.txt

### B. Prune summary stats out of range

In [61]:
df_sim = pd.read_csv('input_ABC_HPC_OSG_CHTC_2_dropIBDq.txt', sep = '\t', low_memory = False)
df_real = pd.read_csv('results_real_output_M23_dropIBDq.txt', sep = '\t')

In [63]:
df_real

Unnamed: 0,SegS_Af_CGI_sum,Sing_Af_CGI_sum,Dupl_Af_CGI_sum,TajD_Af_CGI_stand_sum,SegS_Eu_CGI_sum,Sing_Eu_CGI_sum,Dupl_Eu_CGI_sum,TajD_Eu_CGI_stand_sum,SegS_As_CGI_sum,Sing_As_CGI_sum,...,IBD30_var_eAwA,IBD30_var_eAE,IBD30_var_wAE,IBD30_var_eAJ,IBD30_var_wAJ,IBD30_var_eAM,IBD30_var_wAM,IBD30_var_JM,IBD30_var_JE,IBD30_var_ME
0,10476737,3763064,1810086,-0.303759,6929034,1905172,1010348,0.264549,4996408,1934129,...,21.043722,12.076764,16.202144,22.024543,22.69301,18.992957,23.838793,20.203593,11.34993,16.529731


In [130]:
df_sim.head()

Unnamed: 0,sim,Asc_NAF,Asc_NEU,Asc_NCHB,daf,Log10_NAF,Log10_NANC,Log10_NCEU,Log10_NCHB,Log10_NWA,...,IBD30_var_eAwA,IBD30_var_eAE,IBD30_var_wAE,IBD30_var_eAJ,IBD30_var_wAJ,IBD30_var_eAM,IBD30_var_wAM,IBD30_var_JM,IBD30_var_JE,IBD30_var_ME
0,1027427_1_147,18,15,17,0.063976,4.078094,3.801952,3.351023,4.518461,5.006697,...,38.67202,10.275838,11.485956,43.988784,31.857727,39.367097,36.226953,36.53203,11.40294,11.563498
1,1027427_1_139,16,20,8,0.074399,4.752148,4.023088,4.960666,4.28513,5.051558,...,21.024094,18.119387,18.236795,22.283646,25.04182,22.357914,21.652645,19.978077,20.185323,16.518754
2,1027427_1_99,11,13,8,0.060091,4.527913,4.3934,4.619646,4.701732,5.030033,...,96.845743,85.809536,79.20363,86.872717,91.223661,83.1485,79.796308,79.569965,82.549061,75.200992
3,1027427_2_69,13,7,5,0.077346,4.039969,4.012289,3.681241,3.173478,3.554368,...,93.215753,55.206047,69.97925,75.50434,84.63738,72.568502,77.368275,78.132178,53.553195,57.781601
4,1027427_1_40,13,17,9,0.077928,4.540104,4.524811,3.128399,3.364363,5.450157,...,12.412014,5.206968,3.912816,8.825323,11.958975,11.046655,9.871257,9.40318,4.024615,3.86853


In [128]:
n_params = 24
df_sim_filter = df_sim.iloc[:,:n_params]
df_real_filter = pd.DataFrame()
for stat in df_real:
    sim_max = float(df_sim[stat].max(axis=0))
    sim_min = float(df_sim[stat].min(axis=0))
    real = float(df_real[stat])
    
    if (real > sim_min) and (real < sim_max):
        df_sim_filter[stat] = df_sim[stat]
        df_real_filter[stat] = df_real[stat]

df_sim_filter.to_csv('input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt', sep = '\t', index = False)
df_real_filter.to_csv('results_real_output_M23_dropIBDq_filterstats.txt', sep = '\t', index = False)
df_sim_filter.head()

Unnamed: 0,sim,Asc_NAF,Asc_NEU,Asc_NCHB,daf,Log10_NAF,Log10_NANC,Log10_NCEU,Log10_NCHB,Log10_NWA,...,IBD30_var_eAwA,IBD30_var_eAE,IBD30_var_wAE,IBD30_var_eAJ,IBD30_var_wAJ,IBD30_var_eAM,IBD30_var_wAM,IBD30_var_JM,IBD30_var_JE,IBD30_var_ME
0,1027427_1_147,18,15,17,0.063976,4.078094,3.801952,3.351023,4.518461,5.006697,...,38.67202,10.275838,11.485956,43.988784,31.857727,39.367097,36.226953,36.53203,11.40294,11.563498
1,1027427_1_139,16,20,8,0.074399,4.752148,4.023088,4.960666,4.28513,5.051558,...,21.024094,18.119387,18.236795,22.283646,25.04182,22.357914,21.652645,19.978077,20.185323,16.518754
2,1027427_1_99,11,13,8,0.060091,4.527913,4.3934,4.619646,4.701732,5.030033,...,96.845743,85.809536,79.20363,86.872717,91.223661,83.1485,79.796308,79.569965,82.549061,75.200992
3,1027427_2_69,13,7,5,0.077346,4.039969,4.012289,3.681241,3.173478,3.554368,...,93.215753,55.206047,69.97925,75.50434,84.63738,72.568502,77.368275,78.132178,53.553195,57.781601
4,1027427_1_40,13,17,9,0.077928,4.540104,4.524811,3.128399,3.364363,5.450157,...,12.412014,5.206968,3.912816,8.825323,11.958975,11.046655,9.871257,9.40318,4.024615,3.86853


## 1. Find PLS components
Needed to install R package "pls"
```
R
>install.packages("pls")
```
Use rscript, provided by Consuleo (from old version of ABCtoolbox)

In [136]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
start_stats=25
#end_stats=205
end_stats=192
start_param=2
end_param=24
numComp=$(( ${end_stats} - ${start_stats} ))

time Rscript /vol_c/src/abctoolbox-public/findPLS.r /vol_c/ABC_AJmodels_genome/PLS/dropIBDq/ input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt ${start_stats} ${end_stats} ${start_param} ${end_param} ${numComp}

[1] "/vol_c/ABC_AJmodels_genome/PLS/dropIBDq/"
[1] "input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt"
[1] 167
  [1] "sim"                      "Asc_NAF"                 
  [3] "Asc_NEU"                  "Asc_NCHB"                
  [5] "daf"                      "Log10_NAF"               
  [7] "Log10_NANC"               "Log10_NCEU"              
  [9] "Log10_NCHB"               "Log10_NWA"               
 [11] "Log10_NEA"                "Log10_NAg"               
 [13] "Log10_NJ"                 "Log10_NM"                
 [15] "m"                        "Tgrowth_Af"              
 [17] "TAF"                      "TEM"                     
 [19] "Teu_as"                   "TA"                      
 [21] "TMJ"                      "TAEW"                    
 [23] "Tm"                       "TAg"                     
 [25] "SegS_Af_CGI_sum"          "Sing_Af_CGI_sum"         
 [27] "Dupl_Af_CGI_sum"          "TajD_Af_CGI_stand_sum"   
 [29] "SegS_Eu_CGI_sum"          "Sing_Eu_CGI_sum


Attaching package: ‘pls’

The following object is masked from ‘package:stats’:

    loadings


real	13m22.132s
user	41m54.228s
sys	0m19.892s


This creates the output files
`Routput_input_ABCtoolbox_M2_HPC_OSG_2_dropIBDq.txt`
`RMSE_input_ABCtoolbox_M2_HPC_OSG_2_dropIBDq.txt.pdf`

In [138]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
ls Routput_*
ls RMSE_*

Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt
Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq.txt
RMSE_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt.pdf
RMSE_input_ABC_HPC_OSG_CHTC_2_dropIBDq.txt.pdf


In [139]:
path = '/vol_c/ABC_AJmodels_genome/PLS/dropIBDq'
os.chdir(path)
df = pd.read_csv('Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt', sep = '\t', header = None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,164,165,166,167,168,169,170,171,172,173
0,SegS_Af_CGI_sum,9.378378e+07,171321.000000,-9.292929,1.130608,0.210749,0.096625,-0.062847,-0.056954,0.235218,...,-0.011023,-0.034934,0.045350,-0.042518,0.009846,0.003709,0.041699,-0.020662,0.021932,0.010569
1,Sing_Af_CGI_sum,2.898179e+07,73467.000000,-6.666667,1.151198,0.230677,0.102026,-0.059390,-0.058380,0.231309,...,0.008459,0.018497,-0.023519,0.020002,-0.006366,-0.002653,-0.020320,0.009981,-0.009657,-0.004034
2,Dupl_Af_CGI_sum,1.578134e+07,27316.000000,-9.292929,1.124199,0.193769,0.090062,-0.061020,-0.057649,0.238211,...,0.006872,0.014192,-0.015714,0.015036,-0.004029,-0.000865,-0.013468,0.008405,-0.007985,-0.004829
3,TajD_Af_CGI_stand_sum,4.428776e-02,-1.354249,3.838384,1.651739,0.446607,0.243354,-0.049456,-0.026355,0.163489,...,0.001864,0.003770,-0.004053,0.002647,-0.001338,-0.000595,-0.002548,0.001776,-0.001062,-0.000492
4,SegS_Eu_CGI_sum,8.787518e+07,123493.000000,-9.292929,1.118393,0.184606,0.079728,-0.049606,-0.107809,0.214289,...,0.011700,0.000151,0.002482,-0.008131,-0.005001,0.005433,0.005437,-0.003177,0.006147,-0.000174
5,Sing_Eu_CGI_sum,2.645216e+07,41424.000000,-6.666667,1.133193,0.195124,0.086150,-0.023165,-0.137501,0.150052,...,-0.009601,0.002973,-0.008957,0.011536,0.000469,-0.005469,-0.008808,0.001423,-0.006683,0.000498
6,Dupl_Eu_CGI_sum,1.461340e+07,20971.000000,-9.292929,1.108325,0.158700,0.073576,-0.049196,-0.105928,0.214091,...,-0.002921,-0.000213,0.000641,0.004655,0.007015,-0.003067,-0.001352,0.003708,-0.002069,-0.000611
7,TajD_Eu_CGI_stand_sum,1.281832e+00,-2.279983,1.212121,1.546671,0.539208,0.193995,-0.059313,0.051259,0.147942,...,-0.002711,0.003361,-0.005985,0.005264,-0.000364,-0.002046,-0.004702,0.001766,-0.003963,-0.001003
8,SegS_As_CGI_sum,6.772608e+07,129714.000000,-10.606061,1.107702,0.167893,0.075353,-0.057320,-0.099957,0.201685,...,-0.021825,0.053548,-0.076953,0.060134,-0.038421,-0.009269,-0.082785,0.035367,-0.057949,-0.016051
9,Sing_As_CGI_sum,2.959994e+07,70959.000000,-9.292929,1.106257,0.158036,0.068820,-0.046865,-0.122726,0.168405,...,0.012786,-0.035588,0.046760,-0.039990,0.020225,0.007704,0.051625,-0.022569,0.033013,0.009922


Same number of rows and columns as `/vol_c/ABC_AJmodels_instant/PLS/Routput_input_ABCtoolbox_M2_HPC_OSG_2.txt`

## 2. Transform stats

ABCtoolbox input file, `test_ABC_transform_real.txt`

In [140]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
echo "task transform
input results_real_output_M23_dropIBDq_filterstats.txt
output results_real_output_M23_dropIBDq_filterstats_transformed.txt
linearComb Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt
boxcox 1
logFile results_real_output_M23_dropIBDq_filterstats_transformed.log
verbose
" >test_ABC_transform_sim.txt

run ABCtoolbox to transform stats

In [141]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
ABCtoolbox test_ABC_transform_sim.txt


 ABCtoolbox 2.0 
****************
- Reading inputfile 'test_ABC_transform_sim.txt' ... done!
- Writing log to 'results_real_output_M23_dropIBDq_filterstats_transformed.log'
- Initializing random generator ... done with seed 122406651!
- Transforming Statistics
- Reading header of input file 'results_real_output_M23_dropIBDq_filterstats.txt'... done!
   -> Read a total of 168 columns.
- Reading linear combination file 'Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt'... done!
   -> Read a total of 167 columns.
   -> The first 0 columns of the input file are parameters and will not be transformed.
- Opening output file 'results_real_output_M23_dropIBDq_filterstats_transformed.txt'... done!
- Parsing input file and transforming statistics ... done!
   -> Sucessfully parsed 1 lines.
- Program terminated in 0 min!




ABCtoolbox input file, `test_ABC_transform_sim.txt`

In [142]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
echo "task transform
input input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt
output input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt
linearComb Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt
boxcox 1
logFile input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.log
verbose
" >test_ABC_transform_real.txt

run ABCtoolbox to transform stats

In [143]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
ABCtoolbox test_ABC_transform_real.txt


 ABCtoolbox 2.0 
****************
- Reading inputfile 'test_ABC_transform_real.txt' ... done!
- Writing log to 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.log'
- Initializing random generator ... done with seed 122431343!
- Transforming Statistics
- Reading header of input file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt'... done!
   -> Read a total of 192 columns.
- Reading linear combination file 'Routput_input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats.txt'... done!
   -> Read a total of 167 columns.
   -> The first 24 columns of the input file are parameters and will not be transformed.
- Opening output file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt'... done!
- Parsing input file and transforming statistics ... done!
   -> Sucessfully parsed 96333 lines.
- Program terminated in 0.733333 min!




This will output the files  
`results_real_output_M23_dropIBDq_transformed.txt`
`results_real_output_M23_dropIBDq_transformed.log`
`input_ABC_HPC_OSG_CHTC_2_dropIBDq_transformed.txt`
`input_ABC_HPC_OSG_CHTC_2_dropIBDq_transformed.log`

In [144]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
ls *transformed.txt

input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt
input_ABC_HPC_OSG_CHTC_2_dropIBDq_transformed.txt
results_real_output_M23_dropIBDq_filterstats_transformed.txt
results_real_output_M23_dropIBDq_transformed.txt


In [145]:
pd.read_csv('results_real_output_M23_dropIBDq_filterstats_transformed.txt', sep = '\t')

Unnamed: 0.1,Unnamed: 0,LinearCombination_0,LinearCombination_1,LinearCombination_2,LinearCombination_3,LinearCombination_4,LinearCombination_5,LinearCombination_6,LinearCombination_7,LinearCombination_8,...,LinearCombination_157,LinearCombination_158,LinearCombination_159,LinearCombination_160,LinearCombination_161,LinearCombination_162,LinearCombination_163,LinearCombination_164,LinearCombination_165,LinearCombination_166
0,,-9.1432,0.285309,-2.57652,1.6103,3.22299,-4.32117,2.87833,-0.224268,0.423893,...,0.001424,0.015675,-0.000933,-0.010175,-0.01502,0.002765,-0.004173,0.00314,0.007942,-0.001217


In [146]:
pd.read_csv('input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt', sep = '\t')

Unnamed: 0,sim,Asc_NAF,Asc_NEU,Asc_NCHB,daf,Log10_NAF,Log10_NANC,Log10_NCEU,Log10_NCHB,Log10_NWA,...,LinearCombination_157,LinearCombination_158,LinearCombination_159,LinearCombination_160,LinearCombination_161,LinearCombination_162,LinearCombination_163,LinearCombination_164,LinearCombination_165,LinearCombination_166
0,1027430.0,18,15,17,0.063976,4.07809,3.80195,3.35102,4.51846,5.00670,...,-0.017843,0.003691,0.032805,-0.019743,-0.007165,0.000383,-0.016799,-0.005309,-0.005333,0.000632
1,1027430.0,16,20,8,0.074399,4.75215,4.02309,4.96067,4.28513,5.05156,...,-0.003953,-0.004682,0.005522,-0.019595,-0.025474,0.009572,0.002488,-0.000652,-0.006865,-0.003674
2,1027430.0,11,13,8,0.060090,4.52791,4.39340,4.61965,4.70173,5.03003,...,-0.018984,-0.009914,0.010817,-0.007277,0.002067,0.011239,0.005832,0.001259,-0.003717,-0.010363
3,1027430.0,13,7,5,0.077346,4.03997,4.01229,3.68124,3.17348,3.55437,...,0.006789,-0.027589,0.050889,-0.030995,-0.003838,0.001054,0.015308,-0.017589,0.011886,0.003899
4,1027430.0,13,17,9,0.077928,4.54010,4.52481,3.12840,3.36436,5.45016,...,0.024091,0.007226,-0.026888,0.047626,0.024663,-0.018248,-0.020629,0.000899,-0.001324,0.011535
5,1027430.0,15,18,4,0.099961,4.86455,4.51951,3.97644,3.41996,3.31429,...,-0.009366,0.008814,0.008945,-0.003762,0.011732,-0.011721,0.001348,-0.000758,0.001841,0.001867
6,1027430.0,6,7,18,0.089439,4.81435,4.33642,3.35334,3.89862,5.47052,...,-0.014021,-0.017405,0.019201,-0.017250,-0.009457,0.025895,-0.001018,-0.008705,-0.005128,-0.013763
7,1027430.0,15,12,19,0.098262,4.62506,4.29290,4.95864,4.48417,4.24276,...,-0.017775,-0.004220,0.012445,-0.008980,-0.005002,-0.003985,-0.004759,-0.004621,0.002167,0.001373
8,1027430.0,4,19,14,0.065248,4.01313,3.18921,3.80202,3.68690,5.82570,...,0.025959,-0.000377,-0.043420,0.060342,0.054367,-0.062484,0.021856,-0.007749,0.018557,0.014439
9,1027430.0,8,2,9,0.079326,4.84957,4.29030,3.17926,3.29973,3.27600,...,-0.021466,0.001298,-0.014451,0.012008,0.006422,-0.003130,-0.010759,0.016519,-0.016834,-0.010534


## 3. Estimate parameters

### 3.a. Reduce number of components to use for estimation

In [147]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
cut -f-24,25-34 input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt >input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_10pls.txt
cut -f2-11 results_real_output_M23_dropIBDq_filterstats_transformed.txt >results_real_output_M23_dropIBDq_filterstats_transformed_10pls.txt

In [148]:
pd.read_csv('input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_10pls.txt', sep = '\t').head()

Unnamed: 0,sim,Asc_NAF,Asc_NEU,Asc_NCHB,daf,Log10_NAF,Log10_NANC,Log10_NCEU,Log10_NCHB,Log10_NWA,...,LinearCombination_0,LinearCombination_1,LinearCombination_2,LinearCombination_3,LinearCombination_4,LinearCombination_5,LinearCombination_6,LinearCombination_7,LinearCombination_8,LinearCombination_9
0,1027430.0,18,15,17,0.063976,4.07809,3.80195,3.35102,4.51846,5.0067,...,-2.09836,3.019,-1.27016,2.80133,1.5901,-0.143153,-1.06448,1.69941,-0.242392,0.198788
1,1027430.0,16,20,8,0.074399,4.75215,4.02309,4.96067,4.28513,5.05156,...,-8.10609,-5.89532,-3.34733,2.10706,0.526365,0.13413,1.64016,-0.122922,2.11545,-0.080133
2,1027430.0,11,13,8,0.06009,4.52791,4.3934,4.61965,4.70173,5.03003,...,8.81422,-11.2179,3.69544,1.52933,-2.93833,1.84132,-1.9084,1.91361,-2.89898,-0.626042
3,1027430.0,13,7,5,0.077346,4.03997,4.01229,3.68124,3.17348,3.55437,...,8.04915,3.83988,4.16892,2.27862,2.84874,-1.78499,2.85833,1.46217,-2.84911,-0.249709
4,1027430.0,13,17,9,0.077928,4.5401,4.52481,3.1284,3.36436,5.45016,...,-14.2209,4.13788,-0.139831,3.17374,1.94198,-2.32658,-0.620539,-2.57065,-0.706955,0.991149


In [149]:
pd.read_csv('results_real_output_M23_dropIBDq_filterstats_transformed_10pls.txt', sep = '\t')

Unnamed: 0,LinearCombination_0,LinearCombination_1,LinearCombination_2,LinearCombination_3,LinearCombination_4,LinearCombination_5,LinearCombination_6,LinearCombination_7,LinearCombination_8,LinearCombination_9
0,-9.1432,0.285309,-2.57652,1.6103,3.22299,-4.32117,2.87833,-0.224268,0.423893,-1.13255


In [150]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
cut -f-24,25-44 input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt >input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_20pls.txt
cut -f2-21 results_real_output_M23_dropIBDq_filterstats_transformed.txt >results_real_output_M23_dropIBDq_filterstats_transformed_20pls.txt

In [151]:
pd.read_csv('input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_20pls.txt', sep = '\t').head()

Unnamed: 0,sim,Asc_NAF,Asc_NEU,Asc_NCHB,daf,Log10_NAF,Log10_NANC,Log10_NCEU,Log10_NCHB,Log10_NWA,...,LinearCombination_10,LinearCombination_11,LinearCombination_12,LinearCombination_13,LinearCombination_14,LinearCombination_15,LinearCombination_16,LinearCombination_17,LinearCombination_18,LinearCombination_19
0,1027430.0,18,15,17,0.063976,4.07809,3.80195,3.35102,4.51846,5.0067,...,0.201343,-2.034,1.92227,0.257733,-1.02002,-0.644888,0.259086,1.4293,-0.16155,-0.356775
1,1027430.0,16,20,8,0.074399,4.75215,4.02309,4.96067,4.28513,5.05156,...,0.152884,-0.74315,1.05684,0.834673,-1.09579,1.0726,0.187285,0.547183,-0.685217,0.613078
2,1027430.0,11,13,8,0.06009,4.52791,4.3934,4.61965,4.70173,5.03003,...,-0.090384,2.10855,-2.03312,0.657699,1.67832,0.002409,-0.830205,-0.260904,0.459873,0.295263
3,1027430.0,13,7,5,0.077346,4.03997,4.01229,3.68124,3.17348,3.55437,...,-0.764072,-0.28017,0.166089,0.967757,-0.321073,-0.330425,-0.593167,1.38155,0.969106,-0.129462
4,1027430.0,13,17,9,0.077928,4.5401,4.52481,3.1284,3.36436,5.45016,...,-0.289257,0.962914,-1.22814,0.614856,1.16861,-0.830473,-0.160946,0.088304,0.855561,1.0108


In [153]:
pd.read_csv('results_real_output_M23_dropIBDq_filterstats_transformed_20pls.txt', sep = '\t')

Unnamed: 0,LinearCombination_0,LinearCombination_1,LinearCombination_2,LinearCombination_3,LinearCombination_4,LinearCombination_5,LinearCombination_6,LinearCombination_7,LinearCombination_8,LinearCombination_9,LinearCombination_10,LinearCombination_11,LinearCombination_12,LinearCombination_13,LinearCombination_14,LinearCombination_15,LinearCombination_16,LinearCombination_17,LinearCombination_18,LinearCombination_19
0,-9.1432,0.285309,-2.57652,1.6103,3.22299,-4.32117,2.87833,-0.224268,0.423893,-1.13255,0.551444,-0.284604,1.5877,0.832218,-2.13774,2.60144,-2.14944,0.727222,-0.737901,1.84496


In [154]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
cut -f-24,25-74 input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed.txt >input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_50pls.txt
cut -f2-51 results_real_output_M23_dropIBDq_filterstats_transformed.txt >results_real_output_M23_dropIBDq_filterstats_transformed_50pls.txt

In [155]:
pd.read_csv('input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_50pls.txt', sep = '\t').head()

Unnamed: 0,sim,Asc_NAF,Asc_NEU,Asc_NCHB,daf,Log10_NAF,Log10_NANC,Log10_NCEU,Log10_NCHB,Log10_NWA,...,LinearCombination_40,LinearCombination_41,LinearCombination_42,LinearCombination_43,LinearCombination_44,LinearCombination_45,LinearCombination_46,LinearCombination_47,LinearCombination_48,LinearCombination_49
0,1027430.0,18,15,17,0.063976,4.07809,3.80195,3.35102,4.51846,5.0067,...,0.140191,0.319313,0.166487,-0.214223,0.09449,-0.255397,0.005393,0.174822,0.107697,-0.005607
1,1027430.0,16,20,8,0.074399,4.75215,4.02309,4.96067,4.28513,5.05156,...,0.071876,0.042338,0.361938,0.086246,-0.029912,-0.025375,-0.088881,-0.016676,0.071615,-0.078006
2,1027430.0,11,13,8,0.06009,4.52791,4.3934,4.61965,4.70173,5.03003,...,0.164436,-0.06595,-0.014714,-0.125566,-0.086829,0.195775,0.088206,0.139138,0.000326,0.158891
3,1027430.0,13,7,5,0.077346,4.03997,4.01229,3.68124,3.17348,3.55437,...,0.060331,-0.294564,0.512652,0.064778,0.331467,-0.051478,-0.444448,0.168999,0.616167,-0.349357
4,1027430.0,13,17,9,0.077928,4.5401,4.52481,3.1284,3.36436,5.45016,...,0.337214,-0.33493,-0.551448,-0.25179,0.159858,-0.441722,-0.62095,0.198,-0.239091,0.086324


In [156]:
pd.read_csv('results_real_output_M23_dropIBDq_filterstats_transformed_50pls.txt', sep = '\t')

Unnamed: 0,LinearCombination_0,LinearCombination_1,LinearCombination_2,LinearCombination_3,LinearCombination_4,LinearCombination_5,LinearCombination_6,LinearCombination_7,LinearCombination_8,LinearCombination_9,...,LinearCombination_40,LinearCombination_41,LinearCombination_42,LinearCombination_43,LinearCombination_44,LinearCombination_45,LinearCombination_46,LinearCombination_47,LinearCombination_48,LinearCombination_49
0,-9.1432,0.285309,-2.57652,1.6103,3.22299,-4.32117,2.87833,-0.224268,0.423893,-1.13255,...,0.601873,0.05767,0.347752,0.37372,0.695435,-0.426921,-1.31086,1.43328,0.179963,0.325787


### 3.b Estimate with ABCtoolbox

In [163]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
PLS=20
echo "task estimate
simName input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_${PLS}pls.txt
obsName results_real_output_M23_dropIBDq_filterstats_transformed_${PLS}pls.txt
params 2-6,8-15,17-23
numRetained 1000
maxReadSims 96334
diracPeakWidth 0.01
posteriorDensityPoints 100
jointPosteriors Log10_NWA,Log10_NEA
jointPosteriorDensityPoints 100
writeRetained 0
outputPrefix ABC_M2_genome_estimate_96334_${PLS}pls_1000ret_
logFile ABC_M2_genome_estimate_96334_${PLS}pls_1000ret.log
verbose" >test_ABC_estimate_PLS.txt

In [164]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
ABCtoolbox test_ABC_estimate_PLS.txt


 ABCtoolbox 2.0 
****************
- Reading inputfile 'test_ABC_estimate_PLS.txt' ... done!
- Writing log to 'ABC_M2_genome_estimate_96334_20pls_1000ret.log'
- Initializing random generator ... done with seed 123103656!
- Reading observed data file 'results_real_output_M23_dropIBDq_filterstats_transformed_20pls.txt' ... done!
   -> 1 data sets with 20 statistics each.
- Reading files with simulations:
   - Reading up to 96334 simulations from file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_20pls.txt' ...   - Reading up to 96334 simulations from file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_20pls.txt' ... (1%)   - Reading up to 96334 simulations from file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_20pls.txt' ... (2%)   - Reading up to 96334 simulations from file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_transformed_20pls.txt' ... (3%)   - Reading up to 96334 simulations from file 'input_ABC_HPC_OSG_CHTC_2_dropIBDq_filterstats_t

In [165]:
%%bash
cd /vol_c/ABC_AJmodels_genome/PLS/dropIBDq
ls ABC_M2_genome_estimate_96334_20pls*

ABC_M2_genome_estimate_96334_20pls_1000ret.log
ABC_M2_genome_estimate_96334_20pls_1000ret_model0_BestSimsParamStats_Obs0.txt
ABC_M2_genome_estimate_96334_20pls_1000ret_model0_jointPosterior_8_9_Obs0.txt
ABC_M2_genome_estimate_96334_20pls_1000ret_model0_MarginalPosteriorCharacteristics.txt
ABC_M2_genome_estimate_96334_20pls_1000ret_model0_MarginalPosteriorDensities_Obs0.txt
ABC_M2_genome_estimate_96334_20pls_1000ret_modelFit.txt


In [166]:
pd.read_csv('ABC_M2_genome_estimate_96334_20pls_1000ret_model0_MarginalPosteriorCharacteristics.txt', sep = '\t')

Unnamed: 0,dataSet,Asc_NAF_mode,Asc_NAF_mean,Asc_NAF_median,Asc_NAF_q50_lower,Asc_NAF_q50_upper,Asc_NAF_q90_lower,Asc_NAF_q90_upper,Asc_NAF_q95_lower,Asc_NAF_q95_upper,...,Tm_q99_lower,Tm_q99_upper,Tm_HDI50_lower,Tm_HDI50_upper,Tm_HDI90_lower,Tm_HDI90_upper,Tm_HDI95_lower,Tm_HDI95_upper,Tm_HDI99_lower,Tm_HDI99_upper
0,0,13.4545,13.5906,13.6546,11.8433,15.6044,7.84245,18.551,6.7729,19.1301,...,9.95286,34.5286,23.202,30.0248,16.8694,33.5455,14.798,34.4138,11.18,35
