In [None]:
1.We extracted data from the “UK Biobank”, identifying 3,290 cases with essential hypertension and co-occurring dementia, after excluding individuals with Parkinson’s disease, vascular dementia, frontotemporal dementia, Huntington’s disease, or Creutzfeldt-Jakob disease. A total of 87,524 individuals with essential hypertension but without any neurodegenerative conditions were included as controls and ran genome-wide association analysis using PLINK.2.0. 
2.  We ran GWAS using PLINK.2.0 and 10 principle components, sex_At_birth, current age (2024-year of birth) and age filter > 60. 


In [None]:
import pandas as pd
import subprocess
import sys
import numpy as np
import os
import shutil

In [2]:
def shell_do(command, log=False, return_log=False):
    print(f'Executing: {(" ").join(command.split())}', file=sys.stderr)

    res=subprocess.run(command.split(), stdout=subprocess.PIPE)

    if log:
        print(res.stdout.decode('utf-8'))
    if return_log:
        return(res.stdout.decode('utf-8'), res.stderr.decode('utf-8'))

# Interacting with Swiss Army Knife through Jupyter Notebooks
## What is Swiss Army Knife?
### Swiss Army Knife is a generic app which can be used to perform common file operations for the genotype data housed on DNANexus.
### Contains software such as plink, plink2, bcftools, etc.
#### Below we'll see an example of looping through each chromosome and extracting a set of variants.
##### -iin: input (need to specify each file).
##### -icmd: command to run
##### --instance-type: VM type to run the command on
##### --destination: output folder
#### Note: this will start 23 separate Swiss Army Knife jobs so make sure you are using a reasonable instance type.

In [24]:
import pandas as pd

In [25]:
!dx download ADRD_noFTD_noPARKINSONISM_noALS_noVD_noHT_noCJD_noPDD.CTRL_inclProxy_60older.plink_pheno.txt

Error: path "/opt/notebooks/ADRD_noFTD_noPARKINSONISM_noALS_noVD_noHT_noCJD_no
PDD.CTRL_inclProxy_60older.plink_pheno.txt" already exists but -f/--overwrite
was not set


In [26]:
df = pd.read_csv("ADRD_noFTD_noPARKINSONISM_noALS_noVD_noHT_noCJD_noPDD.CTRL_inclProxy_60older.plink_pheno.txt", sep = '\t')

In [27]:
count_1 = df['ADRD'].value_counts().get(1, 0)
count_2 = df['ADRD'].value_counts().get(2, 0)

print(f"Count of 1 in 'Pheno': {count_1}")
print(f"Count of 2 in 'Pheno': {count_2}")

Count of 1 in 'Pheno': 87524
Count of 2 in 'Pheno': 3290


In [28]:
!dx download UKB_EUR_UMAP_COVARIATES.txt

Error: path "/opt/notebooks/UKB_EUR_UMAP_COVARIATES.txt" already exists but
-f/--overwrite was not set


In [29]:
df1 = pd.read_csv("UKB_EUR_UMAP_COVARIATES.txt", sep = '\t')

In [30]:
# Extract only 'FID' and 'IID' column values
final2 = df[['FID', 'IID']]

In [31]:
final2.to_csv("FID_IID_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt", sep='\t', index=False)

In [32]:
df.to_csv("Pheno_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt", sep='\t', index=False)

In [37]:
# Merging the DataFrames on the 'ID' column
final4 = pd.merge(df1, df, on=['FID','IID'], how='inner')

In [38]:
count_1 = final4['ADRD'].value_counts().get(1, 0)
count_2 = final4['ADRD'].value_counts().get(2, 0)

print(f"Count of 1 in 'Pheno': {count_1}")
print(f"Count of 2 in 'Pheno': {count_2}")

Count of 1 in 'Pheno': 87524
Count of 2 in 'Pheno': 3290


In [39]:
final4.to_csv("Covar_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt", sep='\t', index=False)

In [29]:
!dx upload Pheno_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt

ID                                file-J0fxxfjJbP2g19z4kQG5FBX2
Class                             file
Project                           project-GkYf2zQJbP2Q3vFgf14863Gf
Folder                            /
Name                              Pheno_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Wed May 21 14:10:35 2025
Created by                        vidhu
 via the job                      job-J0fxQfjJbP2vxfkJ3YZJY2K0
Last modified                     Wed May 21 14:10:35 2025
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"


In [30]:
!dx upload FID_IID_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt

ID                                file-J0fxxg8JbP2qFK9Pf5V2ZxGF
Class                             file
Project                           project-GkYf2zQJbP2Q3vFgf14863Gf
Folder                            /
Name                              FID_IID_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Wed May 21 14:10:37 2025
Created by                        vidhu
 via the job                      job-J0fxQfjJbP2vxfkJ3YZJY2K0
Last modified                     Wed May 21 14:10:38 2025
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"


In [42]:
!dx upload Covar_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt

ID                                file-J0kgjX8JbP2qFK9Pf5V85YZ9
Class                             file
Project                           project-GkYf2zQJbP2Q3vFgf14863Gf
Folder                            /
Name                              Covar_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Sun May 25 20:23:37 2025
Created by                        vidhu
 via the job                      job-J0kfqz0JbP2bYb8k0qFGZfQ5
Last modified                     Sun May 25 20:23:38 2025
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"


In [53]:
%%bash

seq 1 1 23 > chr_list.txt

FILE_DIR_A="/Imputed_Genotypes_2023/TOPMed_Plink2"
FILE_DIR_B="/ref_panel/chip_overlaps/"
# FILE_DIR_C="/Imputed_Genotypes_2023/TOPMed_Plink2/extract"

CHR_IDS=$(cat chr_list.txt)


for CHR in $CHR_IDS; do
    dx run swiss-army-knife -iin="chr${CHR}_pgen.pgen" \
    -iin="chr${CHR}_pgen.psam" \
    -iin="chr${CHR}_pgen.pvar" \
    -iin="Pheno_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt" \
    -iin="Covar_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt" \
    -iin="FID_IID_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt" \
    -icmd='plink2 --pfile "$in_prefix" \
    --glm hide-covar firth-fallback cols=+a1freq,+a1freqcc,+a1countcc,+totallelecc,+err,+beta,-test,-nobs,-tz,-orbeta \
    --pheno Pheno_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt --maf 0.01 \
    --pheno-name ADRD --covar-variance-standardize \
    --geno 0.05 --mac 20  --memory 12000 \
    --covar Covar_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt \
    --covar-name PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,GENETIC_SEX,AGE_2024_COV,sbp \
    --keep FID_IID_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt \
    --out "$in_prefix".HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt' -y  --brief --priority normal \
    --instance-type mem3_ssd3_x12 --destination "${projectid}:${FILE_DIR}/" 
done 

job-Gxfvk90JbP2qkqKPv2qkKyyV
job-Gxfvk98JbP2Z91v1b7zfvzKb
job-Gxfvk9QJbP2fG2QQQgfV8kpG
job-Gxfvk9jJbP2Qb2ypGxgj98QG
job-GxfvkB0JbP2fG2QQQgfV8kpQ
job-GxfvkB8JbP2qkqKPv2qkKyyb
job-GxfvkBQJbP2Z91v1b7zfvzKj
job-GxfvkBjJbP2Z91v1b7zfvzKp
job-GxfvkF0JbP2pG9qPGBgPxb44
job-GxfvkF8JbP2fG2QQQgfV8kqQ
job-GxfvkFQJbP2Qb2ypGxgj98QQ
job-GxfvkFjJbP2pG9qPGBgPxb48
job-GxfvkG0JbP2fG2QQQgfV8kqX
job-GxfvkG8JbP2ZZ9jJv5VV29JX
job-GxfvkGjJbP2ZZ9jJv5VV29JZ
job-GxfvkJ0JbP2qzPkKPJ6p9GXy
job-GxfvkJ8JbP2k8Jf4QkVvK3qQ
job-GxfvkJQJbP2qzPkKPJ6p9GY6
job-GxfvkJjJbP2ZZ9jJv5VV29KB
job-GxfvkK0JbP2fG2QQQgfV8kv7
job-GxfvkK8JbP2qzPkKPJ6p9GYB
job-GxfvkKQJbP2k8Jf4QkVvK3qv
job-GxfvkKjJbP2Qb2ypGxgj98V5


In [9]:
!dx download chr9_pgen.HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New.txt.ADRD.glm.logistic.hybrid



In [23]:
import pandas as pd
for CHR in range(1,23):
    print(CHR)
    AGE_SEX_MATCH_TEMP = pd.read_csv(f'chr{CHR}_pgen.Pheno_New_final_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New_sbp.txt.ADRD.glm.logistic.hybrid',
        delim_whitespace=True
    )
    if CHR==1:
        AGE_SEX_MATCH = AGE_SEX_MATCH_TEMP
    else:
        AGE_SEX_MATCH = pd.concat([AGE_SEX_MATCH,AGE_SEX_MATCH_TEMP])

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


In [24]:
AGE_SEX_MATCH.to_csv("output_final_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_0.01_New_UKB_GWAS..txt", sep='\t', index=False)

In [25]:
!dx upload output_final_HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_0.01_New_UKB_GWAS..txt

ID                                file-GxgGvk0JbP2VG03037f9Q24Q
Class                             file
Project                           project-GkYf2zQJbP2Q3vFgf14863Gf
Folder                            /
Name                              HT_Dem_remove_PD_VD_FTD_Hu_CJD_Vs_HT_New_sbp.txt
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Fri Dec 27 15:57:24 2024
Created by                        vidhu
 via the job                      job-GxgF3f0JbP2pzzZF6vYjjb0v
Last modified                     Fri Dec 27 15:57:26 2024
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
