In [3]:
import pandas as pd
import glob
import numpy as np
import os
import shutil

In [4]:
gd_path = '/data/CARD/PD/GP2/raw_genotypes/GD'
idat_path = f'{gd_path}/idats'
swarm_scripts_dir = f'{gd_path}/swarm_scripts'
raw_plink_path = f'{gd_path}/plink'
gd_qc_path = f'{gd_path}/QC'

ilmn_files_path = '/data/CARD/PD/GP2/ilmn_files'
bpm = f'{ilmn_files_path}/NeuroBooster_20042459_A1.bpm'
egt = f'{ilmn_files_path}/NBSCluster_file_n1393_011921.egt'
iaap = f'{ilmn_files_path}/iaap-cli/iaap-cli'

clin_dir = '/data/CARD/PD/GP2/clinical'
key_dir = '/data/CARD/PD/GP2/key_files'
gd_key = f'{key_dir}/gd_key.csv'


In [5]:
nhgri_clin = pd.read_csv(f'{clin_dir}/sample_sheet_nhgri.csv')
ny_clin = pd.read_csv(f'{clin_dir}/sample_sheet_newyork.csv')
key = pd.read_csv(gd_key)
key['filename'] = key['SentrixBarcode_A'].astype(str) + '_' + key['SentrixPosition_A']

key.loc[:,'FID'] = '0'

key[['FID', 'filename', 'FID', 'Sample_ID']].to_csv(f'{gd_path}/update_ids.txt', sep='\t', header=None, index=None)

In [4]:
with open(f'{swarm_scripts_dir}/idat_to_ped.swarm', 'w') as f:
    
    for code in key.SentrixBarcode_A.unique():
        
        idat_to_ped_cmd = f'\
{iaap} gencall \
{bpm} \
{egt} \
{raw_plink_path}/ \
-f {idat_path}/{code} \
-p \
-t 8'
        
        f.write(f'{idat_to_ped_cmd}\n')
f.close()

In [38]:
!swarm -f {swarm_scripts_dir}/idat_to_ped.swarm -g 32 -t 16 --time=10:00:00 --logdir {swarm_scripts_dir}/logs --gres=lscratch:20 --partition=norm

21031358


In [39]:
# copy map file to match name of each ped
map_file = f'{raw_plink_path}/NeuroBooster_20042459_A1.map'
for filename in key.filename:
    ped = f'{raw_plink_path}/{filename}.ped'
    out_map = f'{raw_plink_path}/{filename}.map'
    if os.path.isfile(ped):
        shutil.copyfile(src=map_file, dst=out_map)
    else:
        print(f'{ped} does not exist!')
        print(f'{out_map} creation cancelled')

In [40]:
with open(f'{swarm_scripts_dir}/make_bed.swarm', 'w') as f:
    for filename in key.filename:
        ped = f'{raw_plink_path}/{filename}'
        make_bed_cmd = f'\
plink \
--file {ped} \
--make-bed \
--out {raw_plink_path}/{filename}'

        f.write(f'{make_bed_cmd}\n')
f.close()

In [42]:
!swarm -f {swarm_scripts_dir}/make_bed.swarm -g 64 -t 32 --time=10:00:00 --logdir swarm --gres=lscratch:20 --partition=norm

21031946


In [55]:
# write plink merge command
with open(f"{raw_plink_path}/merge_bed.list", 'w') as f:
    for filename in key.filename:
        bed = f'{raw_plink_path}/{filename}'
        if os.path.isfile(f'{bed}.bed'):
            f.write(f'{bed}\n')
        else:
            print(f'{bed} does not exist!!!')
f.close()

with open(f"{swarm_scripts_dir}/merge.swarm", 'w') as f:

    plink_merge_cmd = f'\
plink \
--merge-list {raw_plink_path}/merge_bed.list \
--update-ids {gd_path}/update_ids.txt \
--make-bed \
--out {gd_qc_path}/GD'
    f.write(f"{plink_merge_cmd}")
f.close()

In [56]:
!swarm -f {swarm_scripts_dir}/merge.swarm -g 64 -t 32 --time=10:00:00 --logdir swarm --gres=lscratch:20 --partition=norm

21034777


In [35]:
!ls /data/CARD/PD/GP2/genotypes/GD/clean

GD_AAC.bed  GD_AJ.bed  GD_AMR.bed  GD_EAS.bed  GD_EUR.bed  GD.QC.metrics.h5
GD_AAC.bim  GD_AJ.bim  GD_AMR.bim  GD_EAS.bim  GD_EUR.bim
GD_AAC.fam  GD_AJ.fam  GD_AMR.fam  GD_EAS.fam  GD_EUR.fam
GD_AAC.hh   GD_AJ.hh   GD_AMR.hh   GD_EAS.hh   GD_EUR.hh
GD_AAC.log  GD_AJ.log  GD_AMR.log  GD_EAS.log  GD_EUR.log


In [3]:
geno_path = f'{gd_qc_path}/GD'
ref_dir_path = '/data/LNG/vitaled2/1kgenomes'
ref_panel = f'{ref_dir_path}/1kg_ashkj_ref_panel_gp2_pruned'
ref_labels = f'{ref_dir_path}/ref_panel_ancestry.txt'
out_dir = '/data/CARD/PD/GP2/genotypes/GD/clean'
out_path = f'{out_dir}/GD'

In [4]:
with open(f'{swarm_scripts_dir}/run_qc_pipeline.swarm','w') as f:
    run_pipeline = f'python3 run_gd_pipeline.py --geno {geno_path} --ref {ref_panel} --ref_labels {ref_labels} --out {out_path}'
    f.write(f'{run_pipeline}\n')
f.close()
!cat {swarm_scripts_dir}/run_qc_pipeline.swarm

python3 run_gd_pipeline.py --geno /data/CARD/PD/GP2/raw_genotypes/GD/QC/GD --ref /data/LNG/vitaled2/1kgenomes/1kg_ashkj_ref_panel_gp2_pruned --ref_labels /data/LNG/vitaled2/1kgenomes/ref_panel_ancestry.txt --out /data/CARD/PD/GP2/genotypes/GD/clean/GD


In [5]:
!swarm -f {swarm_scripts_dir}/run_qc_pipeline.swarm -g 64 -t 32 --time=10:00:00 --logdir {swarm_scripts_dir}/logs --gres=lscratch:20 --partition=norm

21381364


In [6]:
from QC.imputation import *
import QC.config as config
# from QC.imputation import impute_data_prep
# these files need to be in a different place eventually
ref_panel='/data/vitaled2/GenoTools/ref/PASS.Variantsbravo-dbsnp-all.tab'
check_bim_pl = '/data/vitaled2/GenoTools/ref/HRC-1000G-check-bim.pl'

imputed_out_dir = '/data/CARD/PD/GP2/genotypes/GD/imputed' 
os.makedirs(imputed_out_dir, exist_ok=True)

# jenky method for pulling cleaned genos with ancestry labels for imputation... should figure out how to do this better later
impute_genos_list = [x.split('.')[0] for x in glob.glob(f'{out_dir}/*.bed')]

impute_labels_list = [x.split('/')[-1].replace('.bed','').split('_')[-1] for x in impute_genos_list]

for geno, label in zip(impute_genos_list, impute_labels_list):
    label_outdir = f'{imputed_out_dir}/{label}'
    impute_prep_outdir = f'{gd_path}/QC/{label}'
    impute_prep_geno = f'{impute_prep_outdir}/{label}'

    os.makedirs(f'{label_outdir}', exist_ok=True)
    os.makedirs(f'{impute_prep_outdir}', exist_ok=True)
    print(label_outdir)
#     impute_data = impute_data_prep(geno, impute_prep_geno, ref_panel, check_bim_pl)
    
#     run_pipeline = f'python3 ../run_imputation_pipeline.py --geno {geno} --temp {label_temp} --token {config.api_key} --ref_panel {ref_panel} --check_bim_pl {check_bim_pl} --out {label_outdir}/'


/data/CARD/PD/GP2/genotypes/GD/imputed/AAC
/data/CARD/PD/GP2/genotypes/GD/imputed/EUR
/data/CARD/PD/GP2/genotypes/GD/imputed/AJ
/data/CARD/PD/GP2/genotypes/GD/imputed/AMR
/data/CARD/PD/GP2/genotypes/GD/imputed/EAS


In [12]:
!cat /data/CARD/PD/GP2/raw_genotypes/GD/QC/AAC/GD_AAC.fam | wc -l

9


In [10]:
# now make list of vcfs and submit job
job_id_df = pd.DataFrame()

token = config.api_key
password = 'imputer'

for geno, label in zip(impute_genos_list[3:], impute_labels_list[3:]):
    label_outdir = f'{imputed_out_dir}/{label}'
    impute_prep_outdir = f'{gd_path}/QC/{label}'
    impute_prep_geno = f'{impute_prep_outdir}/{label}'
    print(impute_prep_geno)
    vcf_list = [f'{impute_prep_geno}_pre_impute_chr{str(i)}.vcf.gz' for i in range(1,24)]

    
    job_json = submit_job(vcf_list, password=password, token=token)
    
    job_id = job_json['id']
    
    job_id_df_tmp = pd.DataFrame({'job_id': [job_id], 'label': [label], 'path': [geno]})
    job_id_df = job_id_df.append(job_id_df_tmp)

job_id_df.to_csv(f'{imputed_out_dir}/impute_job_ids.csv')


/data/CARD/PD/GP2/raw_genotypes/GD/QC/AMR/AMR
job-20210830-160043-679 Your job was successfully added to the job queue.
***************************
* * * * * * * * * * * * * *
/data/CARD/PD/GP2/raw_genotypes/GD/QC/EAS/EAS
job-20210830-160045-801 Your job was successfully added to the job queue.
***************************
* * * * * * * * * * * * * *


In [39]:
# now, merge EUR and AJ and impute together
# !mkdir /data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ
eur_path = '/data/CARD/PD/GP2/genotypes/GD/clean/GD_EUR'
aj_path = '/data/CARD/PD/GP2/genotypes/GD/clean/GD_AJ'
eur_aj_path = '/data/CARD/PD/GP2/genotypes/GD/clean/GD_EUR_AJ'

plink_merge_cmd = f'\
plink \
--bfile {eur_path} \
--bmerge {aj_path} \
--make-bed \
--out {eur_aj_path}'

shell_do(plink_merge_cmd)

Executing: plink --bfile /data/CARD/PD/GP2/genotypes/GD/clean/GD_EUR --bmerge /data/CARD/PD/GP2/genotypes/GD/clean/GD_AJ --make-bed --out /data/CARD/PD/GP2/genotypes/GD/clean/GD_EUR_AJ


In [40]:
from QC.imputation import *
import QC.config as config
# from QC.imputation import impute_data_prep
# these files need to be in a different place eventually
ref_panel='/data/vitaled2/GenoTools/ref/PASS.Variantsbravo-dbsnp-all.tab'
check_bim_pl = '/data/vitaled2/GenoTools/ref/HRC-1000G-check-bim.pl'

imputed_out_dir = '/data/CARD/PD/GP2/genotypes/GD/imputed' 

geno = eur_aj_path
label = 'EUR_AJ'

label_outdir = f'{imputed_out_dir}/{label}'
impute_prep_outdir = f'{gd_path}/QC/{label}'
impute_prep_geno = f'{impute_prep_outdir}/{label}'

os.makedirs(f'{label_outdir}', exist_ok=True)
os.makedirs(f'{impute_prep_outdir}', exist_ok=True)
print(label_outdir)
impute_data = impute_data_prep(geno, impute_prep_geno, ref_panel, check_bim_pl)

/data/CARD/PD/GP2/genotypes/GD/imputed/EUR_AJ


Executing: plink --bfile GD_EUR_AJ --freq --out EUR_AJ


PLINK v1.90b4.4 64-bit (21 May 2017)           www.cog-genomics.org/plink/1.9/
(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to EUR_AJ.log.
Options in effect:
  --bfile GD_EUR_AJ
  --freq
  --out EUR_AJ

1547809 MB RAM detected; reserving 773904 MB for main workspace.
2004347 variants loaded from .bim file.
249 people (125 males, 124 females) loaded from .fam.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 249 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.
Total genotyping rate is 0.989376.
--freq: Allele frequencies (founders only) written to EUR_AJ.frq .



Executing: perl HRC-1000G-check-bim.pl -b GD_EUR_AJ.bim -f EUR_AJ.frq -r PASS.Variantsbravo-dbsnp-all.tab -h




         Script to check plink .bim files against HRC/1000G for
        strand, id names, positions, alleles, ref/alt assignment
                         William Rayner 2015
                        wrayner@well.ox.ac.uk

                             Version 4.2.5


Options Set:
Reference Panel:             HRC
Bim filename:                GD_EUR_AJ.bim
Reference filename:          PASS.Variantsbravo-dbsnp-all.tab
Allele frequencies filename: EUR_AJ.frq
Allele frequency threshold:  0.2


Reading PASS.Variantsbravo-dbsnp-all.tab
 100000 200000 300000 400000 500000 600000 700000 800000 900000 1000000 1100000 1200000 1300000 1400000 1500000 1600000 1700000 1800000 1900000 2000000 2100000 2200000 2300000 2400000 2500000 2600000 2700000 2800000 2900000 3000000 3100000 3200000 3300000 3400000 3500000 3600000 3700000 3800000 3900000 4000000 4100000 4200000 4300000 4400000 4500000 4600000 4700000 4800000 4900000 5000000 5100000 5200000 5300000 5400000 5500000 5600000 5700000 5800000 5900000 6

Executing: sh Run-plink.sh


PLINK v1.90b4.4 64-bit (21 May 2017)           www.cog-genomics.org/plink/1.9/
(C) 2005-2017 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to TEMP1.log.
Options in effect:
  --bfile GD_EUR_AJ
  --exclude Exclude-GD_EUR_AJ-HRC.txt
  --make-bed
  --out TEMP1

1547809 MB RAM detected; reserving 773904 MB for main workspace.
2004347 variants loaded from .bim file.
249 people (125 males, 124 females) loaded from .fam.
--exclude: 1211155 variants remaining.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 249 founders and 0 nonfounders present.
Calculating allele frequencies... 10111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989 done.
Total genotyping rate is 0.996089.
1211155 variants and 249 people pass filters and QC.
Note: No phenotypes present.
--make-bed to TEMP1.bed + TEMP1.bim + TEMP1.fam ... 101

Executing: plink --bfile GD_EUR_AJ-updated-chr1 --recode vcf --chr 1 --out EUR_AJ_chr1
Executing: plink --bfile GD_EUR_AJ-updated-chr2 --recode vcf --chr 2 --out EUR_AJ_chr2
Executing: plink --bfile GD_EUR_AJ-updated-chr3 --recode vcf --chr 3 --out EUR_AJ_chr3
Executing: plink --bfile GD_EUR_AJ-updated-chr4 --recode vcf --chr 4 --out EUR_AJ_chr4
Executing: plink --bfile GD_EUR_AJ-updated-chr5 --recode vcf --chr 5 --out EUR_AJ_chr5
Executing: plink --bfile GD_EUR_AJ-updated-chr6 --recode vcf --chr 6 --out EUR_AJ_chr6
Executing: plink --bfile GD_EUR_AJ-updated-chr7 --recode vcf --chr 7 --out EUR_AJ_chr7
Executing: plink --bfile GD_EUR_AJ-updated-chr8 --recode vcf --chr 8 --out EUR_AJ_chr8
Executing: plink --bfile GD_EUR_AJ-updated-chr9 --recode vcf --chr 9 --out EUR_AJ_chr9
Executing: plink --bfile GD_EUR_AJ-updated-chr10 --recode vcf --chr 10 --out EUR_AJ_chr10
Executing: plink --bfile GD_EUR_AJ-updated-chr11 --recode vcf --chr 11 --out EUR_AJ_chr11
Executing: plink --bfile GD_EUR_AJ-up

In [42]:
label_outdir = f'{imputed_out_dir}/{label}'
impute_prep_outdir = f'{gd_path}/QC/{label}'
impute_prep_geno = f'{impute_prep_outdir}/{label}'
print(impute_prep_geno)
vcf_list = [f'{impute_prep_geno}_pre_impute_chr{str(i)}.vcf.gz' for i in range(1,24)]

job_json = submit_job(vcf_list, password=password, token=token)

job_id = job_json['id']

job_id_df = pd.DataFrame({'job_id': [job_id], 'label': [label], 'path': [geno]})
# job_id_df = job_id_df.append(job_id_df_tmp)

job_id_df.to_csv(f'{imputed_out_dir}/EUR_AJ_impute_job_ids.csv', index=False)

/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ
['/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr1.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr2.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr3.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr4.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr5.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr6.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr7.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr8.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr9.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr10.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr11.vcf.gz', '/data/CARD/PD/GP2/raw_genotypes/GD/QC/EUR_AJ/EUR_AJ_pre_impute_chr12.vcf.gz', '/data/C

In [16]:
imputed_out_dir = '/data/CARD/PD/GP2/genotypes/GD/imputed' 
# !cat {imputed_out_dir}/impute_job_ids.csv
# !cat {imputed_out_dir}/EUR_AJ_impute_job_ids.csv

impute_jobs1 = pd.read_csv(f'{imputed_out_dir}/EUR_AJ_impute_job_ids.csv')
impute_jobs2 = pd.read_csv(f'{imputed_out_dir}/impute_job_ids.csv')
impute_jobs2.drop(columns=['Unnamed: 0'], inplace=True)
impute_jobs = impute_jobs1.append(impute_jobs2)
impute_jobs

Unnamed: 0,job_id,label,path
0,job-20210830-171721-293,EUR_AJ,/data/CARD/PD/GP2/genotypes/GD/clean/GD_EUR_AJ
0,job-20210830-160043-679,AMR,/data/CARD/PD/GP2/genotypes/GD/clean/GD_AMR
1,job-20210830-160045-801,EAS,/data/CARD/PD/GP2/genotypes/GD/clean/GD_EAS


In [18]:
from QC.imputation import *
import QC.config as config
token = config.api_key
password = 'imputer'


for i, job_id in enumerate(impute_jobs.job_id):
    impute_outpath = f'/data/CARD/PD/GP2/genotypes/GD/imputed/{impute_jobs.iloc[i].label}'
    print(impute_outpath)
    pull_imputed_data(impute_outpath, token, job_id, password)

/data/CARD/PD/GP2/genotypes/GD/imputed/EUR_AJ
Curling output data with the following command: curl -sL https://imputation.biodatacatalyst.nhlbi.nih.gov/get/368819/152c0466a61a3ac69e897a34b7d8e74c325ddc025d4c2979bb6ca127fd7d09b0 | bash
Curling output data with the following command: curl -sL https://imputation.biodatacatalyst.nhlbi.nih.gov/get/368823/caae4e3fd2829aea028d0a9473b011d2b248b3a3154047e4f79f204e3bf7d58c | bash
Curling output data with the following command: curl -sL https://imputation.biodatacatalyst.nhlbi.nih.gov/get/368825/1ccfd61a561587e3f89e0f54fd7fbf55f5489e0b02f789dc686281a396c4997f | bash
Curling output data with the following command: curl -sL https://imputation.biodatacatalyst.nhlbi.nih.gov/get/368826/6862c7cc7a72c3b1c7c5bffe5e619850fea2dfe4c0c77568639cd37b21765b98 | bash

Finished Pulling Imputed Data!

/data/CARD/PD/GP2/genotypes/GD/imputed/AMR
Curling output data with the following command: curl -sL https://imputation.biodatacatalyst.nhlbi.nih.gov/get/368672/8390f

In [6]:
QC_metrics_path = f'{out_path}.QC.metrics.h5'

metrics_df = pd.read_hdf(QC_metrics_path, key='QC')
ancestry_counts_df = pd.read_hdf(QC_metrics_path, key='ancestry_counts')
pred_ancestry_labels = pd.read_hdf(QC_metrics_path, key='ancestry_labels')
conf_mat_df = pd.read_hdf(QC_metrics_path, key='confusion_matrix', index=True)
ref_pcs = pd.read_hdf(QC_metrics_path, key='ref_pcs')
projected_pcs = pd.read_hdf(QC_metrics_path, key='projected_pcs')
total_umap = pd.read_hdf(QC_metrics_path, key='total_umap')
ref_umap = pd.read_hdf(QC_metrics_path, key='ref_umap')
new_samples_umap = pd.read_hdf(QC_metrics_path, key='new_samples_umap')

In [5]:
!ls /data/CARD/PD/GP2/genotypes/GD/clean/

GD_AAC.bed  GD_AJ.bed  GD_AMR.bed  GD_EAS.bed  GD_EUR.bed  GD.QC.metrics.h5
GD_AAC.bim  GD_AJ.bim  GD_AMR.bim  GD_EAS.bim  GD_EUR.bim
GD_AAC.fam  GD_AJ.fam  GD_AMR.fam  GD_EAS.fam  GD_EUR.fam
GD_AAC.hh   GD_AJ.hh   GD_AMR.hh   GD_EAS.hh   GD_EUR.hh
GD_AAC.log  GD_AJ.log  GD_AMR.log  GD_EAS.log  GD_EUR.log


In [17]:
# !cat {swarm_scripts_dir}/logs/swarm_21042872_0.e
!cat /data/CARD/PD/GP2/raw_genotypes/GD/swarm_scripts/logs/swarm_21059707_0.e

/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_missing.hh ); many commands treat
these as missing.
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate.hh ); many commands treat
these as missing.
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_tmp1.hh ); many commands
treat these as missing.
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_tmp2.hh ); many commands
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex.hh ); many commands treat
these as missing.
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_AJ.hh ); many
commands treat these as missing.
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_EUR.hh ); many
commands treat these as missing.
treat these as missing.
/data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_AMR.hh ); many
commands treat these as missing.
treat these as missing.
treat the

In [9]:
!cat /data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_SAS_related_het.log

PLINK v1.90b4.4 64-bit (21 May 2017)
Options in effect:
  --bfile /data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_SAS_related
  --make-bed
  --out /data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_SAS_related_het
  --remove /data/CARD/PD/GP2/raw_genotypes/GD/QC/GD_callrate_sex_ancestry_SAS_related_het.outliers

Hostname: cn3393
Working directory: /gpfs/gsfs12/users/vitaled2/GenoTools/GP2_data_processing
Start time: Fri Aug 13 15:22:17 2021

Random number seed: 1628882537
257652 MB RAM detected; reserving 128826 MB for main workspace.
2004347 variants loaded from .bim file.
2 people (0 males, 2 females) loaded from .fam.
Error: No people remaining after --remove.

End time: Fri Aug 13 15:22:18 2021
