In [1]:
import pandas as pd
import os
import shutil
import numpy as np
import glob

from QC.utils import shell_do
from QC.imputation import impute_data_prep
import QC.config as config

In [2]:
idat_path = '/data/CARD/PD/GP2/raw_genotypes/GP2_idats'
raw_plink_path = '/data/CARD/PD/GP2/raw_genotypes/GP2_plink'
gp2_qc_path = '/data/CARD/PD/GP2/raw_genotypes/GP2_QC'

# now set up path for round1 QC
geno_path = f'{gp2_qc_path}/round1/GP2_QC_round1'

ilmn_files_path = '/data/CARD/PD/GP2/ilmn_files'
bpm = f'{ilmn_files_path}/NeuroBooster_20042459_A1.bpm'
egt = f'{ilmn_files_path}/NBSCluster_file_n1393_011921.egt'
iaap = f'{ilmn_files_path}/iaap-cli/iaap-cli'

key_dir = '/data/CARD/PD/GP2/key_files'
clin_dir = '/data/CARD/PD/GP2/clinical'
key_file = f'{key_dir}/master_key.csv'
clin_file = f'{clin_dir}/GP2_clinical.csv'

swarm_scripts_dir = f'/data/CARD/PD/GP2/swarm_scripts'
# swarm_dir = f'/data/CARD/PD/GP2/swarm_scripts/swarm'

# Create Plink style phenotypes and update ids

In [14]:
pheno = pd.read_csv(clin_file)
key = pd.read_csv(key_file)


pheno['Original_clinicalID'] = pheno['Original_clinicalID'].astype(str)
key['Sample_ID'] = key['Sample_ID'].astype(str)
key['filename'] = key['SentrixBarcode_A'].astype(str) + '_' + key['SentrixPosition_A']
pheno_out = key.merge(pheno, how='inner', left_on='Sample_ID', right_on='Original_clinicalID')
pheno_out['IID'] = pheno_out.SentrixBarcode_A.astype(str) + '_' + pheno_out.SentrixPosition_A.astype(str)
pheno_out['FID'] = 0
pheno_out['FID_new'] = 0
# pheno_out['pheno'] = 0


# update codes to plink format
pheno_out.loc[pheno_out.Phenotype == 'PD', 'pheno'] = 2
pheno_out.loc[pheno_out.Phenotype == 'Control', 'pheno'] = 1
pheno_out.loc[pheno_out.Phenotype == 'Not Reported', 'pheno'] = 0

pheno_out.loc[pheno_out.Sex == 'Male', 'sex'] = 1
pheno_out.loc[pheno_out.Sex == 'Female', 'sex'] = 2
pheno_out.loc[pheno_out.Sex == 'Not Reported', 'sex'] = 0

pheno_out.loc[:,'pheno'] = pheno_out.loc[:,'pheno'].astype(int).astype(str)
pheno_out.loc[:,'sex'] = pheno_out.loc[:,'sex'].astype(int).astype(str)


pheno_out[['FID','IID', 'FID_new', 'GP2SampleID']].to_csv(f'{clin_dir}/update_ids.txt', sep='\t', header=False, index=False)
pheno_out[['FID_new', 'GP2SampleID', 'pheno']].to_csv(f'{clin_dir}/update_pheno.txt', sep='\t', header=False, index=False)
pheno_out[['FID_new', 'GP2SampleID', 'sex']].to_csv(f'{clin_dir}/update_sex.txt', sep='\t', header=False, index=False)


In [4]:
pheno_out.sex.value_counts()

1    3054
2    2259
0       3
Name: sex, dtype: int64

# Convert idats to ped 

In [27]:
with open(f'{swarm_scripts_dir}/idat_to_ped.swarm', 'w') as f:
    
    for code in key.SentrixBarcode_A.unique():
        
        idat_to_ped_cmd = f'\
{iaap} gencall \
{bpm} \
{egt} \
{raw_plink_path}/ \
-f {idat_path}/{code} \
-p \
-t 8'
        
        f.write(f'{idat_to_ped_cmd}\n')
f.close()

In [29]:
!swarm -f {swarm_scripts_dir}/idat_to_ped.swarm -g 32 -t 16 --time=10:00:00 --logdir {swarm_scripts_dir}/logs --gres=lscratch:20 --partition=norm

20940619


In [32]:
# copy map file to match name of each ped
map_file = f'{raw_plink_path}/NeuroBooster_20042459_A1.map'
for filename in key.filename:
    ped = f'{raw_plink_path}/{filename}.ped'
    out_map = f'{raw_plink_path}/{filename}.map'
    if os.path.isfile(ped):
        shutil.copyfile(src=map_file, dst=out_map)
    else:
        print(f'{ped} does not exist!')
        print(f'{out_map} creation cancelled')

/data/CARD/PD/GP2/raw_genotypes/GP2_plink/205275450156_R02C01.ped does not exist!
/data/CARD/PD/GP2/raw_genotypes/GP2_plink/205275450156_R02C01.map creation cancelled


In [33]:
with open(f'{swarm_scripts_dir}/make_bed.swarm', 'w') as f:
    for filename in key.filename:
        ped = f'{raw_plink_path}/{filename}'
        make_bed_cmd = f'\
plink \
--file {ped} \
--make-bed \
--out {raw_plink_path}/{filename}'

        f.write(f'{make_bed_cmd}\n')
f.close()

In [34]:
!swarm -f {swarm_scripts_dir}/make_bed.swarm -g 64 -t 32 --time=10:00:00 --logdir swarm --gres=lscratch:20 --partition=norm

20951177


In [10]:
# write plink merge command
with open(f"{raw_plink_path}/merge_bed.list", 'w') as f:
    for filename in key.filename:
        bed = f'{raw_plink_path}/{filename}'
        if os.path.isfile(f'{bed}.bed'):
            f.write(f'{bed}\n')
        else:
            print(f'{bed} does not exist!!!')
f.close()

with open(f"{swarm_scripts_dir}/merge.swarm", 'w') as f:

    plink_merge_cmd = f'\
plink \
--merge-list {raw_plink_path}/merge_bed.list \
--update-ids {clin_dir}/update_ids.txt \
--make-bed \
--out {raw_plink_path}/GP2_merge'
    f.write(f"{plink_merge_cmd}")
f.close()

/data/CARD/PD/GP2/raw_genotypes/GP2_plink/205275450156_R02C01 does not exist!!!


In [53]:
!swarm -f {swarm_scripts_dir}/merge.swarm -g 64 -t 32 --time=10:00:00 --logdir swarm --gres=lscratch:20 --partition=norm

20969678


In [18]:
!plink --bfile {raw_plink_path}/GP2_merge --pheno {clin_dir}/update_pheno.txt --update-sex {clin_dir}/update_sex.txt --make-bed --out {geno_path}

PLINK v1.90b4.4 64-bit (21 May 2017)           www.cog-genomics.org/plink/1.9/
(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1.log.
Options in effect:
  --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_plink/GP2_merge
  --make-bed
  --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1
  --pheno /data/CARD/PD/GP2/clinical/update_pheno.txt
  --update-sex /data/CARD/PD/GP2/clinical/update_sex.txt

1547809 MB RAM detected; reserving 773904 MB for main workspace.
2004347 variants loaded from .bim file.
5320 people (2988 males, 2180 females, 152 ambiguous) loaded from .fam.
Ambiguous sex IDs written to
/data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1.nosex .
5312 phenotype values present after --pheno.
--update-sex: 5315 people updated, 1 ID not present.
phenotypes to be ignored, use the --allow-no-sex flag.
Using 1 thread (no multithreaded calculations invoked).
Before ma

In [3]:
# run QC pipeline
geno_path = f'{gp2_qc_path}/round1/GP2_QC_round1'
out_dir = f'/data/CARD/PD/GP2/genotypes/GP2/round1/clean'
out_path = f'{out_dir}/GP2_round1'
os.makedirs(f'{out_dir}', exist_ok=True)
ref_dir_path = '/data/LNG/vitaled2/1kgenomes'
ref_panel = f'{ref_dir_path}/1kg_ashkj_ref_panel_gp2_pruned'
ref_labels = f'{ref_dir_path}/ref_panel_ancestry.txt'


with open(f'{swarm_scripts_dir}/run_qc_pipeline.swarm','w') as f:
    run_pipeline = f'python3 ../run_qc_pipeline.py --geno {geno_path} --ref {ref_panel} --ref_labels {ref_labels} --out {out_path}'
    f.write(f'{run_pipeline}\n')
f.close()
!cat {swarm_scripts_dir}/run_qc_pipeline.swarm

python3 ../run_qc_pipeline.py --geno /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1 --ref /data/LNG/vitaled2/1kgenomes/1kg_ashkj_ref_panel_gp2_pruned --ref_labels /data/LNG/vitaled2/1kgenomes/ref_panel_ancestry.txt --out /data/CARD/PD/GP2/genotypes/GP2/round1/clean/GP2_round1


In [6]:
!swarm -f {swarm_scripts_dir}/run_qc_pipeline.swarm -g 64 -t 32 --time=10:00:00 --logdir swarm --gres=lscratch:20 --partition=norm

21866792


In [6]:
QC_metrics_path = f'{out_path}.QC.metrics.h5'

metrics_df = pd.read_hdf(QC_metrics_path, key='QC')
pruned_samples = pd.read_hdf(QC_metrics_path, key='pruned_samples')
ancestry_counts_df = pd.read_hdf(QC_metrics_path, key='ancestry_counts')
pred_ancestry_labels = pd.read_hdf(QC_metrics_path, key='ancestry_labels')
conf_mat_df = pd.read_hdf(QC_metrics_path, key='confusion_matrix', index=True)
ref_pcs = pd.read_hdf(QC_metrics_path, key='ref_pcs')
projected_pcs = pd.read_hdf(QC_metrics_path, key='projected_pcs')
total_umap = pd.read_hdf(QC_metrics_path, key='total_umap')
ref_umap = pd.read_hdf(QC_metrics_path, key='ref_umap')
new_samples_umap = pd.read_hdf(QC_metrics_path, key='new_samples_umap')

In [17]:
# pruned_samples[pruned_samples.step=='callrate_prune']
pruned_samples_pheno = pruned_samples.merge(pheno, left_on='IID', right_on='GP2SampleID', how='left')

In [23]:
for cohort in pruned_samples_pheno.Cohort_name.unique():
    print(cohort)
    print(pruned_samples_pheno[pruned_samples_pheno.Cohort_name == cohort].step.value_counts())
    print()

BCM
callrate_prune    16
sex_prune         12
related_prune      5
Name: step, dtype: int64

UMD
callrate_prune    20
sex_prune         16
related_prune      8
Name: step, dtype: int64

CORIELL
callrate_prune    230
related_prune      91
sex_prune          30
Name: step, dtype: int64



In [27]:
pred_ancestry_labels_pheno = pred_ancestry_labels.merge(pheno, left_on='IID', right_on='GP2SampleID', how='left')

In [29]:
for cohort in pred_ancestry_labels_pheno.Cohort_name.unique():
    print(cohort)
    print(pred_ancestry_labels_pheno[pred_ancestry_labels_pheno.Cohort_name == cohort].label.value_counts())
    print()

BCM
EUR    625
AMR     80
AJ      28
AAC     24
SAS     17
EAS      7
Name: label, dtype: int64

UMD
EUR    355
AJ      55
AAC     14
AMR      9
SAS      6
EAS      4
Name: label, dtype: int64

CORIELL
EUR    3015
AJ      438
AMR     127
AAC     123
EAS      37
SAS      24
FIN       3
Name: label, dtype: int64

nan
Series([], Name: label, dtype: int64)



In [9]:
# param grid for debugging
param_grid = {
        "umap__n_neighbors": [5],
        "umap__n_components": [15],
        "umap__a":[1.0],
        "umap__b": [0.5],
        "svc__C": [0.1],
    }


In [10]:
import pandas as pd
import argparse
import shutil
import os

# local imports
from QC.qc import callrate_prune, het_prune, sex_prune, related_prune, variant_prune, avg_miss_rates
from Ancestry.ancestry import run_ancestry, split_cohort_ancestry
from QC.utils import shell_do


# debug qc issue
# sample-level pruning and metrics
missing_path = f'{geno_path}_missing'
avg_miss = avg_miss_rates(geno_path, missing_path)
# avg_miss

callrate_out = f'{geno_path}_callrate'
callrate = callrate_prune(geno_path, callrate_out)

sex_out = f'{callrate_out}_sex'
sex = sex_prune(callrate_out, sex_out)


# run ancestry methods
ancestry_out = f'{sex_out}_ancestry'
ancestry = run_ancestry(geno_path=sex_out, out_path=ancestry_out, ref_panel=ref_panel, ref_labels=ref_labels, train_param_grid=param_grid)

# get ancestry counts to add to output .h5 later
ancestry_counts_df = pd.DataFrame(ancestry['metrics']['predicted_counts']).reset_index()
ancestry_counts_df.columns = ['label', 'count']


# split cohort into individual ancestry groups
pred_labels_path = ancestry['output']['predicted_labels']['labels_outpath']
cohort_split = split_cohort_ancestry(geno_path=sex_out, labels_path=pred_labels_path, out_path=ancestry_out)

# ancestry-specific pruning steps
het_dict = dict()
related_dict = dict()
variant_dict = dict()

for geno, label in zip(cohort_split['paths'], cohort_split['labels']):

    # related
    related_out = f'{geno}_related'
    related = related_prune(geno, related_out)
    related_dict[label] = related
    
    # het
    het_out = f'{related_out}_het'
    het = het_prune(related_out, het_out)
    het_dict[label] = het
    
    # variant
    variant_out = f'{het_out}_variant'
    if het['pass']:
        variant = variant_prune(het_out, variant_out)
        variant_dict[label] = variant
    else:
        variant = variant_prune(related_out, variant_out)
        variant_dict[label] = variant

# move output to out_path
for label, data in variant_dict.items():
    if data['pass']:
        for suffix in ['bed','bim','fam','hh','log']:
            plink_file = f"{data['output']['plink_out']}.{suffix}"
            plink_outfile = f'{out_path}_{label}.{suffix}'
            shutil.copyfile(src=plink_file, dst=plink_outfile)



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1 --missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_missing


Average Missing Call Rate (lmiss): 0.012545972144127636
Average Missing Genotyping Rate (imiss): 0.01253674154135344

RUNNING: callrate_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1 --mind 0.02 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate



RUNNING: sex_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate --check-sex 0.25 0.75 --maf 0.05 --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate --chr 23 --from-bp 2699520 --to-bp 154931043 --maf 0.05 --geno 0.05 --hwe 1E-5 --check-sex 0.25 0.75 --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex



RUNNING: predict_ancestry


RUNNING: calculate_pcs



Executing: plink --bfile /data/LNG/vitaled2/1kgenomes/1kg_ashkj_ref_panel_gp2_pruned --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.common_snps --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps
Executing: flashpca --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps -d 50 --outpc /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.pcs --outvec /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.vec --outval /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.val --outpve /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.pve --outload /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.loadings --outmeansd /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.meansd




Labeled Reference Ancestry Counts:
AFR    504
EAS    504
SAS    489
AJ     471
EUR    404
AMR    347
AAC    157
FIN     99
Name: label, dtype: int64




Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.common_snps --reference-allele /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.ref_allele --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/pcs_common_snps


/data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/pcs_common_snps /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.meansd /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.loadings /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/pcs_common_snps.projections


Executing: flashpca --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/pcs_common_snps --project --inmeansd /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.meansd --inload /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/ref_common_snps.loadings --outproj /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/pcs_common_snps.projections -v



RUNNING: munge_pca_loadings


RUNNING: train_umap_classifier

Training Balanced Accuracy: 0.9445725167468225
Best Parameters: {'svc__C': 0.1, 'umap__a': 1.0, 'umap__b': 0.5, 'umap__n_components': 15, 'umap__n_neighbors': 5}
Balanced Accuracy on Test Set: 0.9747899159663865

predicted:
 EUR    3988
AJ      492
AMR     248
AAC     164
EAS      48
SAS      47
FIN       9
Name: label, dtype: int64



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR.samples --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR.samples --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS.samples --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_t


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related --geno 0.01 --maf 0.05 --indep-pairwise 50 5 0.5 --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related --


RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EUR_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_ge


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_t


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het



RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AMR_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_ge


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_t


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het



RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_SAS_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_ge


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_total_grm


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het



RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AJ_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_t


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het



RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_AAC_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_ge


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_t


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het



RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_EAS_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_ge


RUNNING: related_prune



Executing: gcta --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN --autosome --maf 0.05 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_total_grm_tmp
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_total_grm_tmp --grm-cutoff 0.125 --make-grm --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_unrelated_grm_tmp
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN --keep /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_unrelated_grm_tmp.grm.id --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related
Executing: gcta --grm /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_t


RUNNING: het_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related --extract /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_tmp.prune.in --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_tmp2 --het --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_tmp3
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related --remove /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het.outliers --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het



RUNNING: variant_prune



Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het --geno 0.05 --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_variant_geno_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_variant_geno_tmp1 --test-missing --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_variant_mis_tmp1
Executing: plink --bfile /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_variant_geno_tmp1 --exclude /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_variant_mis_tmp1.exclude --make-bed --out /data/CARD/PD/GP2/raw_genotypes/GP2_QC/round1/GP2_QC_round1_callrate_sex_ancestry_FIN_related_het_variant_mis_tmp2
Executing: plink --bfile /data/CARD/PD/GP2/raw_ge

In [24]:

        

# build report- eventually make this an individual method
steps = [callrate, sex]
steps2 = [het_dict, related_dict, variant_dict]
metrics_df = pd.DataFrame()
pruned_samples_df = pd.DataFrame()

for item in steps:
    
    step = item['step']
    pf = item['pass']
    level = 'sample'
    ancestry_label = 'all'
    
    for metric, value in item['metrics'].items():
        tmp_metrics_df = pd.DataFrame({'step':[step], 'pruned_count':[value], 'metric':[metric], 'ancestry':[ancestry_label], 'level':[level], 'pass': [pf]})
        metrics_df = metrics_df.append(tmp_metrics_df)
    
    samplefile = item['output']['pruned_samples']
    if os.path.isfile(samplefile):
        pruned = pd.read_csv(samplefile, sep='\t')
        if pruned.shape[0] > 0:
            pruned.loc[:,'step'] = step
            pruned_samples_df = pruned_samples_df.append(pruned[['FID','IID','step']])
        
for item in steps2:
    for ancestry_label, metrics in item.items():
        
        step = metrics['step']
        pf = metrics['pass']
        
        if step in ['het_prune','related_prune']:
            level = 'sample'

            samplefile = metrics['output']['pruned_samples']
            if os.path.isfile(samplefile):
                pruned = pd.read_csv(samplefile, sep='\t')
                if pruned.shape[0] > 0:
                    pruned.loc[:,'step'] = step
                    pruned_samples_df = pruned_samples_df.append(pruned[['FID','IID','step']])
            
        else:
            level = 'variant'

        for metric, value in metrics['metrics'].items():
            tmp_metrics_df = pd.DataFrame({'step':[step], 'pruned_count':[value], 'metric':[metric], 'ancestry':[ancestry_label], 'level':[level], 'pass': [pf]})
            metrics_df = metrics_df.append(tmp_metrics_df)

metrics_df.reset_index(drop=True, inplace=True)


# build output hdf
metrics_outfile = f'{out_path}.QC.metrics.h5'

le = ancestry['data']['label_encoder']
confusion_matrix = ancestry['data']['confusion_matrix']
conf_mat_df = pd.DataFrame(confusion_matrix)
conf_mat_df.columns = le.inverse_transform([i for i in range(8)])
conf_mat_df.index = le.inverse_transform([i for i in range(8)])

ref_pcs = ancestry['data']['ref_pcs']
projected_pcs = ancestry['data']['projected_pcs']
total_umap = ancestry['data']['total_umap']
ref_umap = ancestry['data']['ref_umap']
new_samples_umap = ancestry['data']['new_samples_umap']
pred_ancestry_labels = ancestry['data']['predict_data']['ids']

metrics_df.to_hdf(metrics_outfile, key='QC', mode='w')
pruned_samples_df.to_hdf(metrics_outfile, key='pruned_samples')
ancestry_counts_df.to_hdf(metrics_outfile, key='ancestry_counts')
pred_ancestry_labels.to_hdf(metrics_outfile, key='ancestry_labels')
conf_mat_df.to_hdf(metrics_outfile, key='confusion_matrix', index=True)
ref_pcs.to_hdf(metrics_outfile, key='ref_pcs')
projected_pcs.to_hdf(metrics_outfile, key='projected_pcs')
total_umap.to_hdf(metrics_outfile, key='total_umap')
ref_umap.to_hdf(metrics_outfile, key='ref_umap')
new_samples_umap.to_hdf(metrics_outfile, key='new_samples_umap')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->axis0] [items->None]

  f(store)
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->integer,key->block0_items] [items->None]

  f(store)


Unnamed: 0,step,pruned_count,metric,ancestry,level,pass
0,callrate_prune,267,outlier_count,all,sample,True
1,sex_prune,58,outlier_count,all,sample,True
2,het_prune,0,outlier_count,EUR,sample,True
3,het_prune,0,outlier_count,AMR,sample,True
4,het_prune,0,outlier_count,SAS,sample,True
5,het_prune,0,outlier_count,AJ,sample,True
6,het_prune,0,outlier_count,AAC,sample,True
7,het_prune,0,outlier_count,EAS,sample,True
8,het_prune,0,outlier_count,FIN,sample,True
9,related_prune,76,related_count,EUR,sample,True


In [7]:
QC_metrics_path = f'{out_path}.QC.metrics.h5'

metrics_df = pd.read_hdf(QC_metrics_path, key='QC')
ancestry_counts_df = pd.read_hdf(QC_metrics_path, key='ancestry_counts')
pred_ancestry_labels = pd.read_hdf(QC_metrics_path, key='ancestry_labels')
conf_mat_df = pd.read_hdf(QC_metrics_path, key='confusion_matrix', index=True)
ref_pcs = pd.read_hdf(QC_metrics_path, key='ref_pcs')
projected_pcs = pd.read_hdf(QC_metrics_path, key='projected_pcs')
total_umap = pd.read_hdf(QC_metrics_path, key='total_umap')
ref_umap = pd.read_hdf(QC_metrics_path, key='ref_umap')
new_samples_umap = pd.read_hdf(QC_metrics_path, key='new_samples_umap')

FileNotFoundError: File /data/CARD/PD/GP2/genotypes/GP2/round1/clean/GP2_round1.QC.metrics.h5 does not exist

In [8]:
!ls /data/CARD/PD/GP2/genotypes/GP2/round1/clean/

GP2_round1_AAC.bed  GP2_round1_AJ.hh	GP2_round1_EAS.bim  GP2_round1_EUR.log
GP2_round1_AAC.bim  GP2_round1_AJ.log	GP2_round1_EAS.fam  GP2_round1_SAS.bed
GP2_round1_AAC.fam  GP2_round1_AMR.bed	GP2_round1_EAS.hh   GP2_round1_SAS.bim
GP2_round1_AAC.hh   GP2_round1_AMR.bim	GP2_round1_EAS.log  GP2_round1_SAS.fam
GP2_round1_AAC.log  GP2_round1_AMR.fam	GP2_round1_EUR.bed  GP2_round1_SAS.hh
GP2_round1_AJ.bed   GP2_round1_AMR.hh	GP2_round1_EUR.bim  GP2_round1_SAS.log
GP2_round1_AJ.bim   GP2_round1_AMR.log	GP2_round1_EUR.fam
GP2_round1_AJ.fam   GP2_round1_EAS.bed	GP2_round1_EUR.hh


In [11]:
!ls {gp2_qc_path}/round1/models

umap_linearsvc_ancestry_model.pkl
