## Ancestry-stratified GWAS meta-analyses

### List of GWAS datasets

In [1]:
cat GWAS_datasets

Cohort (and reference)                                      | Filename
------------------------------------------------------------|--------------------------------------------------------------------------------
All of Us AFR (current study)                               | allofus_afr_sumstat.gz                           | 873 cases, 33876 controls
MVP African American or Afro-Caribbean (Verma et al, 2024)  | AFR_GCST90477503_filt_BETASE_markername	       | 1303 cases, 119437 controls
All of Us AMR (current study)                               | allofus_amr_sumstat.gz                           | 876 cases, 33909 controls
MVP Hispanic or Latin American (Verma et al, 2024)          | GCST90475828_filt_BETASE_pos_markeradd           | 1148 cases, 57993 controls
All of Us EUR (current study)                               | allofus_eur_sumstat.gz                           | 8300 cases, 86961 controls
UK Biobank (current study)                                  | rls_ukb_eur_sumstat_ci.gz   

### Create plink files for All of Us African, Latin American, and European cohorts

In [None]:
plink2 --bgen bgen_file_chr"$i".bgen ref-first --sample rls_sample --maf 0.0099 --keep-fam afr_cohort_upd \
--make-pgen --out chr"$i"_afr_cohort --threads 8 
plink2 --bgen bgen_file_chr"$i".bgen ref-first --sample rls_sample --maf 0.0099 --keep-fam amr_cohort_upd \
--make-pgen --out chr"$i"_amr_cohort --threads 8
plink2 --bgen bgen_file_chr"$i".bgen ref-first --sample rls_sample --maf 0.0099 --keep-fam eur_cohort_upd \
--make-pgen --out chr"$i"_afr_cohort --threads 8

### Individual GWAS runs for All of Us and UK Biobank

In [None]:
plink2 --pfile chr"$i"_afr_cohort --pheno RLS_covars_afr_recoded_pheno --pheno-name pheno --covar RLS_covars_afr_recoded_pheno  \
 --covar-name sex,age,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10 \
 --glm firth-fallback  --covar-variance-standardize --out chr"$i"_afr --threads 8 \
 --hwe 1e-6 --geno 0.05

#### Post QC on Million Veteran Program GWAS included filtering out unrealistic standard errors (SE)
#### Keep header, drop rows with invalid SE

In [2]:
#Example AFR cohort
zcat GCST90477503.tsv.gz \
| awk -F'\t' 'NR==1{print $0 "\tSE_est"; next} $10!="#NA" && $11!="#NA" && $10>0 && $11>0 {se=(log($10)-log($11))/(2*1.96); if(se>0 && se<10) print $0 "\t" se}' OFS='\t' \
| gzip -c > AFR_GCST90477503_filt.tsv.gz

In [3]:
head -2 AFR_GCST90477503_filt_BETASE_markername
head -2 GCST90475828_filt_BETASE_pos_markeradd
head -2 EUR_GCST90475829_filt_filt_BETASE_markername

rsID	CHR	POS	EA	NEA	BETA	SE	P	OR	OR_95L	OR_95U	STATUS MARKERNAME
rs545426917	1	66381.0	TATATA	T	0.586118607801422	0.3174775402680828	0.06506	1.797	0.9642	3.348	9999999 chr1:66381:TATATA:T
rsID	CHR	POS	EA	NEA	BETA	SE	P	OR	OR_95L	OR_95U	STATUS	MARKERNAME
rs144572927	1	701595.0	G	A	-0.07407720339631574	0.3625852505828755	0.8381	0.9286	0.4561	1.89	9999999	chr1:701595:G:A
rsID	CHR	POS	EA	NEA	BETA	SE	P	OR	OR_95L	OR_95U	STATUS	MARKERNAME
rs201222573	1	756164.0	T	A	0.01291622526654623	0.02265798846459139	0.5551	1.013	0.9698	1.059	9999999	chr1:756164:T:A


### Ancestry-stratified GWAS meta-analyses

In [None]:
module load metal/2020-05-05
metal

#### European meta-analysis

In [None]:
MARKERLABEL MARKERNAME
## Set marker header to MARKERNAME ...
ALLELELABELS EA NEA
## Set allele headers to EA and NEA ...
EFFECTLABEL LOG(OR)
## Set effect header to LOG(OR) ...
SCHEME STDERR
## Meta-analysis will be based on sample sizes, p-values and direction of effect ...
STDERR LOG(OR)_SE
## Set standard error header to LOG(OR)_SE ...
PVALUE P
## Set p-value header to P ...
PROCESSFILE allofus_eur_sumstat
###########################################################################
## Processing file 'allofus_eur_sumstat'
## Processed 10047826 markers ...

EFFECTLABEL BETA
STDERR SE
PROCESSFILE EUR_GCST90475829_filt_filt_BETASE_markername
###########################################################################
## Processing file 'MVA_EUR_with_CI_markername_h.fixed_confadded_maf_logseadded'
## Processed 9318451 markers ...

STDERR LOG(OR)_SE
EFFECTLABEL LOG(OR)
## Set standard error header to LOG(OR)_SE ...
PROCESSFILE cartagene_eur_sumstat_cols
###########################################################################
## Processing file 'cartagene_eur_sumstat_cols'
## Processed 10133785 markers ...
 
PROCESSFILE clsa_eur_sumstat
###########################################################################
## Processing file 'clsa_eur_sumstat'
## Processed 10214384 markers ...
 
## Set weight header to OBS_CT ...
PROCESSFILE rls_ukb_eur_sumstat
###########################################################################
## Processing file 'rls_ukb_eur_sumstat'
## Processed 10296342 markers ...
 
OUTFILE  AUG2025_EUR_META1 .tbl
ANALYZE HETEROGENEITY

#### African meta-analysis

In [None]:
#African
MARKERLABEL MARKERNAME
## Set marker header to MARKERNAME ...
ALLELELABELS ALT REF
## Set allele headers to EA and NEA ...
EFFECTLABEL LOG(OR)
## Set effect header to OR ...
SCHEME STDERR
## Meta-analysis will be based on effect sizes and their standard errors ...
STDERR LOG(OR)_SE
## Set standard error header to LOG(OR)_SE ...
PVALUE P
## Set p-value header to P ...
PROCESSFILE allofus_afr_sumstat
##########################################################################
PROCESSFILE AFR_GCST90477503_filt_BETASE_markername

OUTFILE AUG2025_AFR_META1 .tbl                   
ANALYZE HETEROGENEITY 

#### Latin American meta-analysis

In [None]:
MARKERLABEL MARKERNAME
## Set marker header to MARKERNAME ...
ALLELELABELS EA NEA
## Set allele headers to ALT and REF ...
EFFECTLABEL LOG(OR)
## Set effect header to LOG(OR) ...
SCHEME STDERR
## Meta-analysis will be based on effect sizes and their standard errors ...
STDERR LOG(OR)_SE
## Set standard error header to LOG(OR)_SE ...
PVALUE P
## Set p-value header to P ...
PROCESSFILE allofus_amr_sumstat
##########################################################################
PROCESSFILE GCST90475828_filt_BETASE_pos_markeradd

OUTFILE AUG2025_AMR_META1 .tbl                  
ANALYZE HETEROGENEITY