Skip to content

Commit

Permalink
Apobec signature (#8)
Browse files Browse the repository at this point in the history
* added def to check for apobec signature

* updated readme
  • Loading branch information
jonas-fuchs committed Nov 3, 2023
1 parent e887425 commit 7247533
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ BAMdash automatically computes serveral statistics:
- for each track it computes recovery and mean coverage (set `-c` for the min coverage) for each element in the track
- if a `*.vcf` is provided it annotates `TRANSITION`/`TRANSVERSION` and type of exchange (`SNP`, `DEL`, `INS`)

If a `*.gb`and `*.vcf` is provided BAMdash computes the aminoacid exchange and the effect in the CDS (inspired by but not as powerful as [snpeff](http://pcingola.github.io/SnpEff/snpeff)). SNP and INDEL vcf annotation supports:
If a `*.gb`and `*.vcf` is provided BAMdash computes if the mutations could have been caused by APOBEC deamination.
Moreover, it annotates the aminoacid exchange and the effect in the CDS (inspired by but not as powerful as [snpeff](http://pcingola.github.io/SnpEff/snpeff)). SNP and INDEL vcf annotation supports:

- `START_LOST`: INDEL or SNP start at the CDS and result in a start loss
- `STOP_LOST`: INDEL or SNP result in the loss of the stop codon
Expand Down
2 changes: 1 addition & 1 deletion bamdash/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""interactively visualize coverage and tracks"""
_program = "bamdash"
__version__ = "0.2"
__version__ = "0.2.1"
41 changes: 41 additions & 0 deletions bamdash/scripts/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,40 @@ def get_mutations(start, stop, cds, variant, seq):
return ac_exchange, ac_effect


def analyse_a3_signature(variant, seq):
"""
analyse if the mutation lies in a APOBEC3 recognition site
(AC) or (GT) and if it is a C>T or G>A (reverse complement)
exchange
:param variant: slice of the variant df
:param seq: sequence of the ref
:return: recognition site or NONE
"""

if variant["type"] == "SNP":
pos = variant["position"]
# for C ref check the prior nt
if variant["reference"] == "C":
site = str(seq[pos-2:pos])
# check if the site and mutation type is in line with A3A activity
if site == "TC" and variant["mutation"] == "T":
return "YES", f"{site[0]}>{site[1]}<"
else:
return "NO", f"{site[0]}>{site[1]}<"
# for G ref check the following nt
if variant["reference"] == "G":
site = str(seq[pos-1:pos+1])
# check if the site and mutation type is in line with A3A activity
if site == "GA" and variant["mutation"] == "A":
return "YES", f">{site[0]}<{site[1]}"
else:
return "NO", f">{site[0]}<{site[1]}"

return "NO", "-"


def annotate_vcf_df(vcf_df, cds_dict, seq):
"""
annotate mutations for their aminoacid effect
Expand Down Expand Up @@ -467,6 +501,13 @@ def annotate_vcfs_in_tracks(track_data):
return track_data
# annotate each vcf df
for vcf_track in index_positions[1]:
# analyse for potential a3a activity
a3_signatures, sites = [], []
for variant in track_data[vcf_track][0].iterrows():
a3_result = analyse_a3_signature(variant[1], track_data[gb_indices[0]][2])
a3_signatures.append(a3_result[0]), sites.append(a3_result[1])
track_data[vcf_track][0]["potential APOBEC3 activity"] = a3_signatures
track_data[vcf_track][0]["checked site"] = sites
# check if CDS is present
if "CDS" not in track_data[gb_indices[0]][0]:
continue
Expand Down

0 comments on commit 7247533

Please sign in to comment.