In [1]:
from civicpy import civic
import requests
import csv

In [2]:
civic.load_cache(on_stale='ignore')
variants = civic.get_all_variants()

In [3]:
def collapse_civic_coordinates(civic_variant):
    coordinates = v.coordinates
    if not coordinates.chromosome:
        return ''
    c1 = f'{coordinates.chromosome}:{coordinates.start}-{coordinates.stop}'
    if coordinates.chromosome2:
        c2 = f'{coordinates.chromosome2}:{coordinates.start2}-{coordinates.stop2}'
        return ','.join([c1,c2])
    return c1

header = [
    'gene_symbol',
    'entrez_id',
    'civic_variant_name',
    'civic_variant_id',
    'civic_aliases',
    'allele_registry_id',
    'genomic_coordinates',
    'genomic_ref',
    'genomic_alt',
#     'primary_form',
#     'primary_form_vrs',
]

In [4]:
len(variants)

2565

In [5]:
variant_types = dict()
for v in variants:
    for v_type in v.types:
        variant_types[v_type.name] = v_type
        
fusion_type = variant_types['transcript_fusion']

In [6]:
allowed_type_names = [
    'missense_variant',
    'frameshift_truncation',
    'stop_gained',
    'frameshift_variant',
    'inframe_deletion',
    'inframe_insertion',
    'splice_donor_variant',
    'synonymous_variant',
    'polymorphic_sequence_variant',
    'stop_lost',
    'start_lost',
    'SNP',
    'frameshift_elongation'
]

allowed_types = set([
    variant_types[x] for x in allowed_type_names
])

In [7]:
def is_small_MNV_or_indel(variant):
    c = variant.coordinates
    if '-' in variant.name or 'ZYGO' in variant.name:
        return False
    if not allowed_types.intersection(variant.types):
        return False
    if fusion_type in variant.types:
        return False
    if c.chromosome2:
        return False
    if not (c.chromosome and c.start and c.stop):
        return False
    if c.stop - c.start >= 25:
        return False
    if not (c.reference_bases or c.variant_bases):
        return False
    return True

def is_tier_i_evidence(variant):
    tier_i = False
    for e in variant.evidence:
        if e.evidence_level in ['A','B']:
            tier_i = True
    return tier_i

In [8]:
with open('civic.2020-06-30.tsv', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=header, delimiter='\t')
    writer.writeheader()
    for v in variants:
        if not (is_small_MNV_or_indel(v) and is_tier_i_evidence(v)):
            continue
        record = {
            'gene_symbol': v.gene.name,
            'entrez_id': v.gene.entrez_id,
            'civic_variant_name': v.name,
            'civic_variant_id': v.id,
            'civic_aliases': ','.join(v.aliases),
            'allele_registry_id': v.allele_registry_id,
            'genomic_coordinates': collapse_civic_coordinates(v),
            'genomic_ref': v.coordinates.reference_bases,
            'genomic_alt': v.coordinates.variant_bases,
#             'primary_form': '',
#             'primary_form_vrs': ''
        }
        writer.writerow(record)