In [1]:
import numpy as np
import pandas as pd

## Genomic footprint of NMD regions

In [2]:
# Read NMD annotations
nmd = pd.read_csv(
    "../outputs/nmd_annotations.tsv",
    sep="\t",
    usecols=["nmd_definitive"],
    dtype="category",
)

# Get relative CDS footprint
footprint = nmd["nmd_definitive"].value_counts(normalize=True).rename("footprint")
footprint.index.rename("region", inplace=True)

# Save to output
footprint.to_csv(
    "../outputs/stats_nmd_footprint.tsv",
    sep="\t",
)

footprint

region
nmd_target        0.613358
distal_nmd        0.219985
start_proximal    0.087468
long_exon         0.079188
Name: footprint, dtype: float64

## Variant ascertainment in ClinVar

In [3]:
cv = (
    pd.read_csv(
        "../outputs/clinvar_variants_lof_with_nmd_annotation.tsv",
        sep="\t",
    )
    .replace(
        {
            "nmd": "nmd_target",
            "distal": "distal_nmd",
        }
    )
    .rename(columns={"variant_region": "region"})
)

# The proportion of ClinVar variants in each region (ascertainment)
cv_asc = cv.region.value_counts(normalize=True).rename("proportion_variants")
cv_asc.index.rename("region", inplace=True)

# Normalize by the footprint of the region
cv_asc_norm = (cv_asc / footprint).rename("prop_norm")

# Save to output
cv_asc_norm.to_csv("../outputs/stats_clinvar_ascertainment.tsv", sep="\t")

cv_asc_norm

region
distal_nmd        0.567295
long_exon         1.406573
nmd_target        1.153902
start_proximal    0.640970
Name: prop_norm, dtype: float64

## Proportion of VUS in ClinVar by NMD region

In [4]:
# Find the relative proportion of P/LP/VUS/LB/B variants in ClinVar by region
cv_vus = (
    cv.groupby("region")["acmg"]
    .value_counts(normalize=True)
    .rename("proportion")
    .reset_index()
)

# Save to output
cv_vus.to_csv("../outputs/stats_clinvar_acmg_by_region.tsv", sep="\t", index=False)

cv_vus

Unnamed: 0,region,acmg,proportion
0,distal_nmd,P/LP,0.667627
1,distal_nmd,VUS,0.310823
2,distal_nmd,B/LB,0.02155
3,long_exon,P/LP,0.941715
4,long_exon,VUS,0.05421
5,long_exon,B/LB,0.004075
6,nmd_target,P/LP,0.908297
7,nmd_target,VUS,0.087468
8,nmd_target,B/LB,0.004235
9,start_proximal,P/LP,0.86465
