In [None]:
import logging
from enum import Enum
import re
import csv

from dotenv import load_dotenv
import requests

from variation.query import QueryHandler

In [None]:
# Environment variables are set for gene-normalizer dynamodb instance and 
# UTA DB credentials
load_dotenv()

In [None]:
# query_handler = QueryHandler()

In [None]:
r = requests.get("https://moalmanac.org/api/features")
variants_resp = r.json()
variants = list()

In [None]:
no_feature_found = dict()
for v in variants_resp:
    variant_record = {
        "id": v["feature_id"]
    }
    variant_record.update(v["attributes"][0])

    feature_type = variant_record["feature_type"]
    supported = False
    if feature_type == "rearrangement":
        feature = "{}{}{}".format(variant_record["gene1"] if variant_record.get("gene1") else "",
                                      f"--{variant_record['gene2']}" if variant_record.get("gene2") else "",
                                      f" {variant_record['rearrangement_type']}"
                                      if variant_record.get("rearrangement_type") else "")
    elif feature_type == "somatic_variant":
        feature = "{}{}".format(variant_record["gene"] if variant_record.get("gene") else "",
                                    f" {variant_record['protein_change']}"
                                    if variant_record.get("protein_change") else "")
        supported = True
    elif feature_type == "germline_variant":
        # TODO: Check this
        feature = "{}{}".format(variant_record["gene"], " (Pathogenic)"
                                if variant_record["pathogenic"] == "1.0" else "")
    elif feature_type == "copy_number":
        feature = "{} {}".format(variant_record["gene"], variant_record["direction"])
    elif feature_type == "microsatellite_stability":
        feature = "{}".format(variant_record.get("status"))
    elif feature_type == "mutational_signature":
        csn = variant_record["cosmic_signature_number"]
        feature = "COSMIC Signature {}".format(csn)
    elif feature_type == "mutational_burden":
        clss = variant_record["classification"]
        min_mut = variant_record["minimum_mutations"]
        mut_per_mb = variant_record["mutations_per_mb"]
        feature = "{}{}".format(clss,
                                f" (>= {min_mut} mutations)" if min_mut
                                else (f" (>= {mut_per_mb} mutations/Mb)"
                                        if mut_per_mb else ""))
    elif feature_type == "neoantigen_burden":
        feature = "{}".format(variant_record["classification"])
    elif feature_type == "knockdown" or feature_type == "silencing":
        feature = "{}{}".format(variant_record["gene"], f" ({variant_record['technique']})"
                                if variant_record["technique"] else "")
    else:
        feature = "{}".format(variant_record["event"])
        
    if not feature:
        no_feature_found[variant_record["id"]] = v

    if len(feature.split()) == 1:
        # A lot of these are just Gene names
        continue

    if not supported:
        # Do not even try to normalizer
        continue
        
    variant_record["feature"] = feature
    # TODO: analysis

