Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error: ExpressionException: Cannot combine expressions from different source objects #3583

Open
iris-garden opened this issue May 6, 2024 · 0 comments
Labels
discourse migrated from discuss.hail.is

Comments

@iris-garden
Copy link
Owner

Note

The following post was exported from discuss.hail.is, a forum for asking questions about Hail which has since been deprecated.

(Jan 08, 2024 at 19:16) mgarcia said:

I am trying to run the following component of our Variant QC script to find variants that fall outside of HWE. We are filtering variants by MAF thresholds: 0.01 for all ancestry groups, save for the European group, for which our threshold is 0.001.

def flag_hwe(ds, pval_threshold, n_participants):
    '''
    Perform sex aware HWE tests on autosomes and chrX for hg38
    '''
    populations = ds.aggregate_cols(hl.agg.counter(ds.sa.POP)) # produces a dict of population
    if None in populations:
        print("WARNING: Skipping HWE test for samples of undefined population")
        del populations[None]
    hwe_tests = {}
    # Pre-queue the 3 hwe tests for each population
    for pop in populations:
        #ds_subpop=hl.agg.filter(ds.sa.POP == pop)
        ds_subpop=hl.variant_qc(ds.filter_cols(ds.sa.POP == pop))
        if pop=='EUR':
            maf_thr=0.001
        elif pop=='AFR':
            maf_thr=0.01
        elif pop=='SAS':
            maf_thr=0.01
        elif pop=='EAS':
            maf_thr=0.01
        ds_subpop = ds_subpop.filter_rows(ds_subpop.variant_qc.AF[1] > maf_thr)
        hwe_tests['hwe{}'.format(pop)] = hl.cond(
            # Filter 1) hwe{Pop}
            # If variant is autosomal and there are enough samples in the population, apply HWE
            ds_subpop.locus.in_autosome() & (hl.agg.count_where(ds_subpop.sa.POP == pop) >= n_participants),
            hl.agg.filter(ds_subpop.sa.POP == pop, hl.agg.hardy_weinberg_test(ds_subpop.GT)),
            # If variant is not autosomal, but in X nonpar and there are enough females in the population, apply HWE
            hl.cond(
                ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Female")) >= n_participants),
                hl.agg.filter((ds.sa.POP == pop) & (ds.sa.GENDER == "Female"), hl.agg.hardy_weinberg_test(ds.GT)),
                hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
            )
        )
        hwe_tests['hwe{}FemalePAR'.format(pop)] = hl.cond(
            # Filter 2) hwe{Pop}Female
            # If this is an X par variant and there are enough females in the population, apply HWE
            ds.locus.in_x_par() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Female")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop) & (ds.sa.GENDER == "Female"), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
        hwe_tests['hwe{}MalePAR'.format(pop)] = hl.cond(
            # Filter 2) hwe{Pop}Male
            # If this is an X par variant and there are enough males in the population, apply HWE
            ds.locus.in_x_par() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Male")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop) & (ds.sa.GENDER == "Male"), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
        hwe_tests['hwe{}FemalenonPAR'.format(pop)] = hl.cond(
            # Filter 3) hwe{Pop}nonPAR
            # If this is an X par variant and there are enough subjects in the population, apply HWE
            ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Female")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
        hwe_tests['hwe{}MalenonPAR'.format(pop)] = hl.cond(
            # Filter 3) hwe{Pop}nonPAR
            # If this is an X par variant and there are enough subjects in the population, apply HWE
            ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Male")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
    # execute tests
    ds = ds.annotate_rows(
        hweByPop=hl.struct(
            **hwe_tests
        )
    )

    hwe_info = {}
    # Pre-queue the 3 info annotations for each population
    # This exports variant.hwe{}.p_value to an info field variant.info.pHWE{}
    for pop in populations:
        hwe_info['pHWE{}'.format(pop)] = ds.hweByPop['hwe{}'.format(pop)].p_value
        hwe_info['pHWE{}FemalePAR'.format(pop)] = ds.hweByPop['hwe{}FemalePAR'.format(pop)].p_value
        hwe_info['pHWE{}MalePAR'.format(pop)] = ds.hweByPop['hwe{}MalePAR'.format(pop)].p_value
        hwe_info['pHWE{}FemalenonPAR'.format(pop)] = ds.hweByPop['hwe{}FemalenonPAR'.format(pop)].p_value
        hwe_info['pHWE{}MalenonPAR'.format(pop)] = ds.hweByPop['hwe{}MalenonPAR'.format(pop)].p_value
    # execute annotations
    ds = ds.annotate_rows(
        info=ds.info.annotate(
            **hwe_info
        )
    )
    # Add filter flags
    for pop in populations:
        # Now flag variants based on the failed populations
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}'.format(pop)] != -1) & (ds.info['pHWE{}'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}FemalePAR'.format(pop)] != -1) & (ds.info['pHWE{}FemalePAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}FemalePAR'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}MalePAR'.format(pop)] != -1) & (ds.info['pHWE{}MalePAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}MalePAR'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}FemalenonPAR'.format(pop)] != -1) & (ds.info['pHWE{}FemalenonPAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}FemalenonPAR'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}MalenonPAR'.format(pop)] != -1) & (ds.info['pHWE{}MalenonPAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}MalenonPAR'.format(pop)),
                ds.filters
            )
        )
    return ds

I have encountered the following error which I am hoping to better understand. Does this error result from referencing of two different MatrixTables: ds_subpop and ds? Thank you in advance.

Traceback (most recent call last):
  File "hail_variant_qc_v11_run3_YL.py", line 550, in <module>
    vcf = flag_hwe(vcf, args.pval_threshold, args.n_individuals)
  File "hail_variant_qc_v11_run3_YL.py", line 142, in flag_hwe
    hwe_tests['hwe{}'.format(pop)] = hl.cond(
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/deprecated/classic.py", line 285, in wrapper_function
    return wrapped_(*args_, **kwargs_)
  File "<decorator-gen-736>", line 2, in cond
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/typecheck/check.py", line 584, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/expr/functions.py", line 351, in cond
    return if_else(condition, consequent, alternate, missing_false)
  File "<decorator-gen-738>", line 2, in if_else
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/typecheck/check.py", line 584, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/expr/functions.py", line 406, in if_else
    indices, aggregations = unify_all(condition, consequent, alternate)
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/expr/expressions/base_expression.py", line 417, in unify_all
    raise ExpressionException(
hail.expr.expressions.base_expression.ExpressionException: Cannot combine expressions from different source objects.
    Found fields from 2 objects:
        <hail.matrixtable.MatrixTable object at 0x2b7371b28670>: ['locus', 'sa', 'sa', 'GT']
        <hail.matrixtable.MatrixTable object at 0x2b73719a8490>: ['locus', 'sa', 'GT']
@iris-garden iris-garden added the discourse migrated from discuss.hail.is label May 6, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
discourse migrated from discuss.hail.is
Projects
None yet
Development

No branches or pull requests

1 participant