Skip to content

Commit

Permalink
updated cosmic version and fixed CDS mutations translation from COSMI…
Browse files Browse the repository at this point in the history
…C, also fixed parameter shourtucs in ensembl download
  • Loading branch information
husensofteng committed Aug 20, 2021
1 parent 62515ff commit 79b399a
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
10 changes: 5 additions & 5 deletions pypgatk/cgenomes/cgenomes_proteindb.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_multiple_options(options_str: str):
def get_mut_pro_seq(snp, seq):
nucleotide = ["A", "T", "C", "G"]
mut_pro_seq = ""
if "?" not in snp.dna_mut: # unambiguous DNA change known in CDS sequence
if "?" not in snp.dna_mut and snp.aa_mut!='p.?': # unambiguous DNA change known in CDS sequence
positions = re.findall(r'\d+', snp.dna_mut)
if ">" in snp.dna_mut and len(positions) == 1: # Substitution
tmplist = snp.dna_mut.split(">")
Expand Down Expand Up @@ -149,19 +149,19 @@ def get_mut_pro_seq(snp, seq):

def cosmic_to_proteindb(self):
"""
This function translate the mutation file + COSMIC genes into a protein Fasta database. The
method write into the file system the output Fasta.
This function translates the mutation file + COSMIC genes into a protein Fasta database. The
method writes into the file system the output Fasta.
:return:
"""
self.get_logger().debug("Starting reading the All cosmic genes")
self.get_logger().debug("Starting reading All cosmic genes")
COSMIC_CDS_DB = {}
for record in SeqIO.parse(self._local_complete_genes, 'fasta'):
try:
COSMIC_CDS_DB[record.id].append(record)
except KeyError:
COSMIC_CDS_DB[record.id] = [record]

cosmic_input = open(self._local_mutation_file, encoding="latin-1") # CosmicMutantExport.tsv
cosmic_input = open(self._local_mutation_file, encoding="latin-1")

header = cosmic_input.readline().split("\t")
regex = re.compile('[^a-zA-Z]')
Expand Down
4 changes: 2 additions & 2 deletions pypgatk/commands/ensembl_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
@click.option('-l', '--list_taxonomies',
help='Print the list of all the taxonomies in ENSEMBL (https://www.ensembl.org)', is_flag=True,
default=False)
@click.option('-sg', '--skip_gtf', help="Skip the gtf file during the download", is_flag=True)
@click.option('-sg', '--skip_gtf', help="Skip the GTF file during the download", is_flag=True)
@click.option('-sp', '--skip_protein', help="Skip the protein fasta file during download", is_flag=True)
@click.option('-sc', '--skip_cds', help='Skip the CDS file download', is_flag=True)
@click.option('-sd', '--skip_cdna', help='Skip the cDNA file download', is_flag=True)
@click.option('-sdn', '--skip_cdna', help='Skip the cDNA file download', is_flag=True)
@click.option('-sn', '--skip_ncrna', help='Skip the ncRNA file download', is_flag=True)
@click.option('-sd', '--skip_dna', help='Skip the DNA (reference genome assembly) file download', is_flag=True)
@click.option('-sv', '--skip_vcf', help='Skip the VCF variant file', is_flag=True)
Expand Down
4 changes: 2 additions & 2 deletions pypgatk/config/cosmic_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ cosmic_data:
output_directory: database_cosmic
cosmic_server:
cosmic_ftp: https://cancer.sanger.ac.uk
mutations_url: cosmic/file_download/GRCh38/cosmic/v88
mutations_url: cosmic/file_download/GRCh38/cosmic/v94
all_cds_genes_file: All_COSMIC_Genes.fasta.gz
mutations_file: CosmicMutantExport.tsv.gz
mutations_cellline_url: cosmic/file_download/GRCh38/cell_lines/v92
mutations_cellline_url: cosmic/file_download/GRCh38/cell_lines/v94
mutations_cellline_file: CosmicCLP_MutantExport.tsv.gz
all_celllines_genes_file: All_CellLines_Genes.fasta.gz
cosmic_user: ''
Expand Down

0 comments on commit 79b399a

Please sign in to comment.