In [1]:
import unittest
%run VCF_annotation_functions.ipynb

In [2]:
# test response for all values returned by function get_info
testresponse = \
[{'allele_string': 'A/G',
  'assembly_name': 'GRCh37',
  'colocated_variants': [{'allele_string': 'COSMIC_MUTATION',
                          'end': 1158631,
                          'id': 'COSV55420653',
                          'phenotype_or_disease': 1,
                          'seq_region_name': '1',
                          'somatic': 1,
                          'start': 1158631,
                          'strand': 1,
                          'var_synonyms': {'COSMIC': ['COSM3750257']}},
                         {'allele_string': 'A/G/T',
                          'end': 1158631,
                          'frequencies': {'G': {'aa': 0.9789,
                                                'afr': 0.9985,
                                                'amr': 0.9294,
                                                'sas': 0.956}},
                          'id': 'rs6603781',
                          'minor_allele': 'A',
                          'minor_allele_freq': 0.0467,
                          'seq_region_name': '1',
                          'start': 1158631,
                          'strand': 1}],
  'end': 1158631,
  'id': '1:g.1158631A>G',
  'input': '1:g.1158631A>G',
  'most_severe_consequence': 'synonymous_variant',
  'seq_region_name': '1',
  'start': 1158631,
  'strand': 1,
  'transcript_consequences': [{'amino_acids': 'D',
                               'biotype': 'protein_coding',
                               'cdna_end': 863,
                               'cdna_start': 863,
                               'cds_end': 570,
                               'cds_start': 570,
                               'codons': 'gaT/gaC',
                               'consequence_terms': ['synonymous_variant'],
                               'gene_id': 'ENSG00000078808',
                               'gene_symbol': 'SDF4',
                               'gene_symbol_source': 'HGNC',
                               'hgnc_id': 24188,
                               'impact': 'LOW',
                               'protein_end': 190,
                               'protein_start': 190,
                               'strand': -1,
                               'transcript_id': 'ENST00000263741',
                               'variant_allele': 'G'},
                              {'amino_acids': 'D',
                               'biotype': 'protein_coding',
                               'cdna_end': 822,
                               'cdna_start': 822,
                               'cds_end': 570,
                               'cds_start': 570,
                               'codons': 'gaT/gaC',
                               'consequence_terms': ['synonymous_variant'],
                               'gene_id': 'ENSG00000078808',
                               'gene_symbol': 'SDF4',
                               'gene_symbol_source': 'HGNC',
                               'hgnc_id': 24188,
                               'impact': 'LOW',
                               'protein_end': 190,
                               'protein_start': 190,
                               'strand': -1,
                               'transcript_id': 'ENST00000545427',
                               'variant_allele': 'G'}]}]

In [3]:
# test response for all values returned unknown by function get_info
testresponse2 = \
[{'allele_string': 'A/G',
  'assembly_name': 'GRCh37',
  'colocated_variants': [{'allele_string': 'COSMIC_MUTATION',
                          'end': 1158631,
                          'id': 'COSV55420653',
                          'phenotype_or_disease': 1,
                          'seq_region_name': '1',
                          'somatic': 1,
                          'start': 1158631,
                          'strand': 1,
                          'var_synonyms': {'COSMIC': ['COSM3750257']}},
                         {'allele_string': 'A/G/T',
                          'end': 1158631,
                          'frequencies': {'G': {'aa': 0.9789,
                                                'afr': 0.9985,
                                                'amr': 0.9294,
                                                'sas': 0.956}},
                          'id': 'rs6603781',
                          'minor_allele': 'A',
                          #'minor_allele_freq': 0.0467,
                          'seq_region_name': '1',
                          'start': 1158631,
                          'strand': 1}],
  'end': 1158631,
  'id': '1:g.1158631A>G',
  'input': '1:g.1158631A>G',
  #'most_severe_consequence': 'synonymous_variant',
  'seq_region_name': '1',
  'start': 1158631,
  'strand': 1,
  'transcript_consequences': [{'amino_acids': 'D',
                               'biotype': 'protein_coding',
                               'cdna_end': 863,
                               'cdna_start': 863,
                               'cds_end': 570,
                               'cds_start': 570,
                               'codons': 'gaT/gaC',
                               'consequence_terms': ['synonymous_variant'],
                               'gene_id': 'ENSG00000078808',
                               #'gene_symbol': 'SDF4',
                               'gene_symbol_source': 'HGNC',
                               'hgnc_id': 24188,
                               'impact': 'LOW',
                               'protein_end': 190,
                               'protein_start': 190,
                               'strand': -1,
                               'transcript_id': 'ENST00000263741',
                               'variant_allele': 'G'},
                              {'amino_acids': 'D',
                               'biotype': 'protein_coding',
                               'cdna_end': 822,
                               'cdna_start': 822,
                               'cds_end': 570,
                               'cds_start': 570,
                               'codons': 'gaT/gaC',
                               'consequence_terms': ['synonymous_variant'],
                               'gene_id': 'ENSG00000078808',
                               'gene_symbol': 'SDF4',
                               'gene_symbol_source': 'HGNC',
                               'hgnc_id': 24188,
                               'impact': 'LOW',
                               'protein_end': 190,
                               'protein_start': 190,
                               'strand': -1,
                               'transcript_id': 'ENST00000545427',
                               'variant_allele': 'G'}]}]

In [4]:
# create test class

class testVCF_annotation(unittest.TestCase):
    
    def test_get_variant_type(self):
        self.assertEqual(get_variant_type('A','G'), 'substitution')
        self.assertEqual(get_variant_type('A','AG'), 'insertion')
        self.assertEqual(get_variant_type('ATTT','A'), 'deletion')
        self.assertEqual(get_variant_type('AGCTTC','AGGTTA'), 'unknown')
        
    def test_get_hgvs_notation(self):
        chromosome = '5' 
        position = 1234
        placeholder = '1:g.25362501C>A'  # for unknown variant type
        
        self.assertEqual(get_hgvs_notation(chromosome, position,'A','G','substitution'), '5:g.1234A>G')
        self.assertEqual(get_hgvs_notation(chromosome, position,'A','AG','insertion'), '5:g.1234_1235insG')
        self.assertEqual(get_hgvs_notation(chromosome, position,'ATTT','A','deletion'), '5:g.1234_1237del')
        self.assertEqual(get_hgvs_notation(chromosome, position,'AGCTTC','AGGTTA','unknown'), placeholder)
        
    def test_get_info(self):
        self.assertEqual(get_info(testresponse), (['SDF4'], ['synonymous_variant'], [0.0467]))
        self.assertEqual(get_info(testresponse2), (['unknown'], ['unknown'], ['unknown']))
        
    
unittest.main(argv=[''], verbosity=2, exit=False)

test_get_hgvs_notation (__main__.testVCF_annotation) ... ok
test_get_info (__main__.testVCF_annotation) ... ok
test_get_variant_type (__main__.testVCF_annotation) ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.021s

OK


<unittest.main.TestProgram at 0x21e2eff4880>