In [23]:
# Set the process name to be human readable in htop
import setproctitle
setproctitle.setproctitle("Generate_Proteins_Summary")

import pandas as pd
pd.options.display.max_columns = 999

from tqdm import tqdm, tqdm_notebook
from tqdm._tqdm_notebook import tqdm_notebook

import helper as my

from collections import defaultdict

# Paramaters for file locations

In [4]:
# Base directory for whole project
base_dir = "/home/sdw95/3D_SARS2"

# Fetch Info for Human / Viral Proteins

In [5]:
# NOTE: This script based on some hardcoded local resources
#       Protein info pulled from our ECLAIR pipeline for (local) convenience

In [6]:
# Uniprot Info Generated in ECLIAR
uniprot_info = pd.read_csv("/home/adr66/eclair/data/uniprot_info.txt", sep="\t")

# Original Interaction List from Krogan Paper
interactions = pd.read_csv("/home/sdw95/Collaborators/Lab_Member_Requests/Haiyuan/2020_03_27_COVID19_3DInteractome/COVID19_Interactome.txt", sep="\t")
id2gene = interactions.set_index("Preys")["PreyGene"].to_dict() # Map Human UniProt to Prefered Gene Name

# Generate Full Set of Identifiers submitted to ECLIAR
interactions2 = pd.read_csv("/home/sdw95/Collaborators/Eclair_Runs/2020_04_22_COVID19_Human_Interactome/Interactions.txt", sep="\t", names=["P1", "P2"])
all_ids = set(interactions2["P1"].to_list() + interactions2["P2"].to_list())

# Pull out / Reformat Lines in UniProt Info we care about
protein_summary = uniprot_info[uniprot_info["id"].map(lambda x: x in all_ids)][["id", "reviewed", "genes", "protein names", "length", "sequence"]]
protein_summary["Is_Viral"] = protein_summary["id"].map(lambda x: "COVID" in x)
protein_summary["reviewed"] = protein_summary[["id", "reviewed"]].apply(lambda x: True if x[1] == "reviewed" and not "COVID" in x[0] else False, axis=1)
protein_summary["genes"] = protein_summary[["id", "genes"]].apply(lambda x: x[1] if not x[0] in id2gene else id2gene[x[0]], axis=1)

In [8]:
# Save
#protein_summary.sort_values(["Is_Viral", "genes"])[["id", "Is_Viral", "reviewed", "genes", "protein names", "length", "sequence"]]
protein_summary.sort_values(["Is_Viral", "genes"])[["id", "Is_Viral", "genes", "length", "sequence"]].to_csv("{0}/Data/Proteins.txt".format(base_dir), sep="\t", header=["ID", "Is_Viral", "Gene_Name", "Length", "Sequence"], index=None)

# Fetch Human UniProt Domain Annotations

In [20]:
# NOTE: Also pulled from ECLAIR pipeline rather than generated from scratch
#       This data only used for web-display (and not positive the intended function
#       of this data was retained)

In [11]:
# Read ECLAIR Domain Info
pfam_doms = pd.read_csv("/home/adr66/eclair/features/per_feature/pfam_domains.txt", names=["ID", "Is_Domain"], sep="\t")
pfam_doms = pfam_doms[pfam_doms["ID"].map(lambda x: x in all_ids)]

pfam_doms["Is_Viral"] = pfam_doms["ID"].map(lambda x: "COVID" in x)

pfam_doms.sort_values(["Is_Viral", "ID"])[["ID", "Is_Viral", "Is_Domain"]].to_csv("{0}/Data/Protein_Domains.txt".format(base_dir), sep="\t", index=None)

# Add COVID UniProt IDs to Protein Info where Available

In [21]:
# Re-read Protein summary
proteins = pd.read_csv("{0}/Data/Proteins.txt".format(base_dir), sep="\t")

# Read local covid19 uniprot fasta
# I think this was just a download through the UniProt
# COVID19 resource (https://covid-19.uniprot.org/uniprotkb?query=*)
covid_fasta = my.fasta2dict("../uniprot_covid_19.fasta")

covid_fasta = {k.split("|")[1]:v for k, v in covid_fasta.iteritems() if "OX=2697049" in k}

In [24]:
# Select most appropriate UniProt Mapping for each protein
covid2best = defaultdict(lambda: ["None", {"Pident":0}])
for uniA, seqA in tqdm_notebook(proteins[["ID", "Sequence"]].values):
    if(not "COVID19" in uniA):
        continue
    
    for uniB, seqB in covid_fasta.iteritems():
        # For some reason manually selected which UniProt should be used for
        # orf8?
        if((uniA == "orf8") and not uniB == "P0DTC8"):
            continue
        print uniA, uniB
        align = my.NWSeqAlignment(seqB, seqA)
        print align
        if(align["Pident"] > covid2best[uniB][1]["Pident"]):
            covid2best[uniB] = [uniA, align]

HBox(children=(IntProgress(value=0, max=359), HTML(value=u'')))

COVID19E P0DTD2
{'Coverage2': 0.8666666666666667, 'Begin': 0, 'End': 107, 'Coverage1': 0.6701030927835051, 'Pident': 0.21333333333333335, 'Score': -12.5, 'Positives': 0.37333333333333335, 'Align2': 'MYSFVSEETGTL---------------------------IVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV-----', 'Align1': 'MDPKISEMHPALRLVDPQIQLAVTRMENAVGRDQNNVGPKVYPIILRLGSPLSLNMARKTLNSLEDKAFQLTPIAVQMTKLA----------TTEELPDEFVVVTVK', 'Alignment': '|---+||----|                           -|--++|-|---+-|-+----|-+|---|+----+-|-+-|-+          -+--+||--|     '}
COVID19E P0DTC1
{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.0170261066969353, 'Pident': 0.5733333333333334, 'Score': -2052.5, 'Positives': 0.8133333333333334, 'Align2': 'M--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.010569334836527621, 'Pident': 0.6533333333333333, 'Score': -3383.5, 'Positives': 0.88, 'Align2': 'M----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSFVS------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------EETG-------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.0589159465828751, 'Pident': 0.56, 'Score': -527.0, 'Positives': 0.72, 'Align2': 'MYSF-----------------------------------------------------------------VSEETGT-----------------------------------------LIVNS---------------------------------------------------------------------------------------------------------------------VLLFL-------------------AFVV-------FLL-------VTLAILTAL----------------------------------------RLCAY----------------------C----------------------------------------------------------------------------------------------------------------------CNIV----------------------------------------------NVSLVK-----------------------------------------------------------------------------------------------PSFYVYSRVKNL-------------------------------------------------------------------NSSRVP--------------------------------------------------------------------------------------------------------DLL--------------------------------------

{'Coverage2': 0.2747747747747748, 'Begin': 0, 'End': 222, 'Coverage1': 1.0, 'Pident': 0.26229508196721313, 'Score': -51.0, 'Positives': 0.5737704918032787, 'Align2': 'MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGNYKLNTDHSSSSDNIALLVQ', 'Align1': 'M---------------------------------------------------FHLVDFQVTIAEILL---------------------------IIMRTFKV-----SIWNLDYIINLI-----------------------------------------IKNLSKSLT-------------------------------------ENKYSQLDEEQPMEID', 'Alignment': '|                                                   |--+-+-||+|--+|                           --+-+|++     |+|+-+---|++                                         ||+|-|-+|                                     ---+|---+---+-+-'}
COVID19M P0DTC5
{'Coverage2': 1.0, 'Begin': 0, 'End': 222, 'Coverage1': 1.0, 'Pident': 1.0, 'Score': 1152.0, 'Positives': 1.0, 'Align

{'Coverage2': 0.972972972972973, 'Begin': 0, 'End': 1279, 'Coverage1': 0.16967792615868027, 'Pident': 0.38288288288288286, 'Score': -388.0, 'Positives': 0.5675675675675675, 'Align2': 'M---------------------------------------------------------------------ADSNGTITVEELKKL---------------LEQWNLVIGFLFLTW--------------------ICLLQF-------------------------AYANRNRFLYI--------------------------------------IKLI------------------------FLWLL---------------WPV-----------------------------------------TLACFVL---------------------------------------AAVYRIN---------------------------------------------------------------------------------W------------------------------------------------------------------------------------------------------------------------------------------------------IT----GGIAI------------------------------------------------AMACLVGL----------------MWLSY-----------FIASFRLFARTRSMWSFNP----------ETNILLNV-----PL--------------------------HGTILTR------------------------------------------------

333/|/ 93%|| 333/359 [00:20<00:01, 16.63it/s]{'Coverage2': 0.9928400954653938, 'Begin': 0, 'End': 4408, 'Coverage1': 0.09443813847900114, 'Pident': 0.4701670644391408, 'Score': -1625.0, 'Positives': 0.6372315035799523, 'Align2': 'M------------------------------------------------------------------------SD--------------------------------------------------NG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PQN----------------------------------QRNAPRITFGG-------------------------------PSDSTGSNQN--------------------------------------------------------------------------GERSGARS----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------KQRRPQGL-----------------------------------------

{'Coverage2': 0.28878281622911695, 'Begin': 0, 'End': 419, 'Coverage1': 1.0, 'Pident': 0.30578512396694213, 'Score': -130.5, 'Positives': 0.5206611570247934, 'Align2': 'MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTALTQHGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVATEGALNTPKDHIGTRNPANNAAIVLQLPQGTTLPKGFYAEGSRGGSQASSRSSSRSRNSSRNSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQKRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVTPSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQTVTLLPAADLDDFSKQLQQSMSSADSTQA', 'Align1': 'M------------KIIL-----------------------------------FLAL---------------ITLAT-----CELYHYQEC---VRGTTVLLKE--PCSSGTYEGNSPFH--PLADNKFALTCFSTQFAFACPD---GVKH-------VYQLRARSVSPKLFIRQ--------------------------------------------------EEVQELYSPI------------------------------------------------------------------------FLIVAAIVF--------IT-----LCFT-------------------------------------LKRKTE---------------------

{'Coverage2': 0.9474940334128878, 'Begin': 0, 'End': 1295, 'Coverage1': 0.3118617439120189, 'Pident': 0.31026252983293556, 'Score': -271.5, 'Positives': 0.45346062052505964, 'Align2': 'M----------------------------------------------------------------------SDNGPQ-----------------------------------------------------------------------------------------------------------NQRNAPRITFGG------------------------------------------------------PSDSTGS----------------------NQNG-------------------------ERSGARSKQRRPQ------GLPN--------------------------------------NTASWFT----------------------ALTQHGKEDLKFPRGQ----------------GVPINTNSS-----------------------------------------------------------PDDQIGY--YR-------------------RATRRIRG-----------GDGKMKD--------------------------------------------------------------------------LSPRWYFYYLGT-----------------------------------------------------------GPEAGLPYGANKDGI-----IWVATE-------------------------------GA----LNTPKDHIGTRNPANNAAIVLQLPQGTTLP--KGFYAEGSRGGSQASSRSSSR

{'Coverage2': 0.07619795758051846, 'Begin': 0, 'End': 1273, 'Coverage1': 1.0, 'Pident': 0.4536082474226804, 'Score': -498.0, 'Positives': 0.6597938144329897, 'Align2': 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADA

{'Coverage2': 0.05734485467399843, 'Begin': 0, 'End': 1273, 'Coverage1': 1.0, 'Pident': 0.5205479452054794, 'Score': -523.5, 'Positives': 0.6712328767123288, 'Align2': 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADA

{'Coverage2': 0.047918303220738416, 'Begin': 0, 'End': 1273, 'Coverage1': 1.0, 'Pident': 0.5245901639344263, 'Score': -539.0, 'Positives': 0.7377049180327869, 'Align2': 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLAD

{'Coverage2': 0.206598586017282, 'Begin': 0, 'End': 1285, 'Coverage1': 0.9563636363636364, 'Pident': 0.4218181818181818, 'Score': -315.5, 'Positives': 0.5527272727272727, 'Align2': 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPL-VDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLK---SFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVI-RGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVN---CTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPS

{'Coverage2': 0.3118617439120189, 'Begin': 0, 'End': 1295, 'Coverage1': 0.9474940334128878, 'Pident': 0.31026252983293556, 'Score': -271.5, 'Positives': 0.45346062052505964, 'Align2': 'MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHVSGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPFLGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPINLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYNENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFLPFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLTPTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLGAENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGIAVEQDKNTQEVFAQVKQIYKTPPIKDF------GGFNFSQILPDP

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.04086265607264472, 'Pident': 1.0, 'Score': -1185.0, 'Positives': 1.0, 'Align2': 'MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.7944444444444444, 'Begin': 0, 'End': 259, 'Coverage1': 0.6441441441441441, 'Pident': 0.24444444444444444, 'Score': -29.5, 'Positives': 0.37777777777777777, 'Align2': 'ME------------------SLVPGFNEKTHV----------------------QLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIK--RSDARTAP----------------HGHVMVELVAELEGIQYGRSGETLGVLV---------PHVG---------EIPVAYRKVL--LRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTR-ELMRELNGG', 'Align1': 'MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVL--------------AAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELV--------IGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGN------YK-----LNTDHSSSSDNIALLVQ----', 'Alignment': '|-                  +||-||---|-+                      -|--||-----||              |--+--+---|-+-+----|--|-----||-  |--|||--                ||-++---+-|-|-+        +|-++         -|+|         ||-||--+-|  -+---++---|-|--|----+-+|+      |+     -||-|||---- -|+-+    '}
COVID1

{'Coverage2': 0.9944444444444445, 'Begin': 0, 'End': 1274, 'Coverage1': 0.1406127258444619, 'Pident': 0.43333333333333335, 'Score': -425.5, 'Positives': 0.6, 'Align2': 'M---------------------------------------------------ESLVPGFNEKT-----HV-------QLSLPVLQVRD----------VLVRG--FG---DSVEEVL-------------------------------------SEARQHLKDGTCGLVEVEKGVLPQLE---------QPYVFIKRSD--------------ARTAPHGHVMVELVAELE-GIQYGR------------------SGETLGVLVPHVGEI------------------------PVA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YRKVLL--------------RK--------------NGNKGAG----------------------------------------------------------------------------------------------------------GHSYGADL------------------------------------------------------------------------------------------------------------------------------------------------------KSF--DL---------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.031555051078320094, 'Pident': 1.0, 'Score': -1372.0, 'Positives': 1.0, 'Align2': '-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.8489208633093526, 'Begin': 0, 'End': 243, 'Coverage1': 0.5315315315315315, 'Pident': 0.2517985611510791, 'Score': -35.0, 'Positives': 0.4172661870503597, 'Align2': 'AGNATEVPANSTV---------------------------------------------------LSFCAFAVDAAKAYKDYLASGGQPITNCVKMLC---------------THTGTGQAITVTPEAN----------------MDQESFGGASCCLYCRCHI----DHPNPKGFCDLKG--KYVQIPTT-------------CANDPVGFTLKNTVCTVCGMWK---GYGCSCDQLREPMLQ', 'Align1': 'MAD-----SNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVL--AAVYRINWITGGIAIA----MACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGA---VILRGHLRIAGHH---LGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDS-GFAAYSRY--RIGNYKLNTDHSSSSDNIAL-LVQ', 'Alignment': '--+     +|-|+                                                   ++---|-+  |--|+----+||--|-    |-|               ----|----+--||-|                ++-|---||   +--|-|+    -|   -|-||+|-  |-+-+-|+             -|-|- ||---+--  --|-+|   -+--|-|-+-- ++|'}
COVID19nsp10 P0DTD1
{'Coverage2': 1.0, 'Begin': 0, 'End'

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.10919088766692851, 'Pident': 0.4460431654676259, 'Score': -427.0, 'Positives': 0.5683453237410072, 'Align2': '-----------------------------------------------------------------------------------------------------------------------AGNATEVPANSTVLSFC-------------------------------------AFAVD-----------------------------------------------------------------------------------AAKAYKDYL-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ASGGQPI------------------------------TNCVKMLCTH------TGT----------------GQAITVTPEANMDQE----------SFGGAS------------CCLY------------------------------------CRCHIDHPNPKGFCDL-------------------------------------------KGKYVQIPTT-------------------------------CAN--------------------------------------------------------DP---------------------V

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.0018320180383314543, 'Pident': 0.9230769230769231, 'Score': -3513.0, 'Positives': 0.9230769230769231, 'Align2': '------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.01021209740769835, 'Pident': 0.6923076923076923, 'Score': -627.0, 'Positives': 0.7692307692307693, 'Align2': '-----------------------------------------------------------------------------------------------------------------------------------------------------------------SAD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AQSF------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LNGFAV-----------------------------------------------------------

{'Coverage2': 0.9871244635193133, 'Begin': 0, 'End': 4417, 'Coverage1': 0.20885357548240635, 'Pident': 0.37875536480686695, 'Score': -1087.5, 'Positives': 0.5579399141630901, 'Align2': '---------------------------------------SADAQSFLNRVCGV--------------------SAARLTPCG--------------TGTSTD-------------VVYR----------------------AFDI--------YND---------------------KVAGFAKFLKTN----------------------------------------CCR--------FQEKDEDDNLIDSYFVVK--------------------------------------------------------------------------------------------------------------------------RHTFSNYQHEE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TIYNLLKD------------------------------CPAV-------AK------HDFFK------------------------------------FR-----------------------IDGDMVP------------------------------------------------------------------

{'Coverage2': 0.07725321888412018, 'Begin': 0, 'End': 935, 'Coverage1': 0.96, 'Pident': 0.4533333333333333, 'Score': -370.5, 'Positives': 0.7066666666666667, 'Align2': 'SADAQSFLNRVCGVSAARLTPCGTGT---STDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVY

{'Coverage2': 0.2371244635193133, 'Begin': 0, 'End': 933, 'Coverage1': 0.9954954954954955, 'Pident': 0.36036036036036034, 'Score': -232.0, 'Positives': 0.581081081081081, 'Align2': 'SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCD-RAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHT

{'Coverage2': 0.2929184549356223, 'Begin': 0, 'End': 934, 'Coverage1': 0.9927272727272727, 'Pident': 0.3381818181818182, 'Score': -179.0, 'Positives': 0.5527272727272727, 'Align2': 'SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGF--PFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQH

{'Coverage2': 0.434549356223176, 'Begin': 0, 'End': 946, 'Coverage1': 0.9665871121718377, 'Pident': 0.28162291169451076, 'Score': -127.0, 'Positives': 0.4606205250596659, 'Align2': 'SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDED--DNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLT-------KGP

{'Coverage2': 0.12312811980033278, 'Begin': 0, 'End': 602, 'Coverage1': 0.9866666666666667, 'Pident': 0.3466666666666667, 'Score': -228.0, 'Positives': 0.56, 'Align2': 'AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQ-FTSLEIPRRNVATLQ', 'Align1': 'M----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSFVSEETG---------

{'Coverage2': 0.415973377703827, 'Begin': 0, 'End': 626, 'Coverage1': 0.9090909090909091, 'Pident': 0.27636363636363637, 'Score': -107.0, 'Positives': 0.44, 'Align2': '--------AVGACVLCNSQTS-------LRCGACI----RRPF--LCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLG-GMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPL-NRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTS--HTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ', 'Align1': 'MDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRW-------------QLALSKGVHFVC----------------------------------NLL-----------LLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCR

{'Coverage2': 0.6139767054908486, 'Begin': 0, 'End': 651, 'Coverage1': 0.8806682577565632, 'Pident': 0.24821002386634844, 'Score': -74.0, 'Positives': 0.39618138424821003, 'Align2': 'AVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGL--YKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYV---FTGYRVTKNSKVQIGE----------YTFEKG-------DYG---DAVVYRGT----TTYKLNVGDYFVLTSHT-VMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRI---------------IPARARVECFDKFKVNSTLEQYVFCTVNAL----PETTADIVVFDE-ISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQ', 'Align1': '-----MSDNGPQNQR-----------------------------------NAP--------RITFGGPSDSTGSNQ---------NGERSGARSKQRRPQGLPN-------NTASWFTA------------LTQHGKEDLK-----FPRGQG

{'Coverage2': 0.14231499051233396, 'Begin': 0, 'End': 527, 'Coverage1': 1.0, 'Pident': 0.37333333333333335, 'Score': -190.0, 'Positives': 0.56, 'Align2': 'AENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQ', 'Align1': 'MYS---------------------------------------------------------------------FVSEE-------------------TGTLIVNSVLLFLAF---------------------------------------------------------------------VVFLLVTLAI-LTAL---------RLCAYC----------------------------------------------------------------------------------------------

{'Coverage2': 0.48956356736242884, 'Begin': 0, 'End': 544, 'Coverage1': 0.9381818181818182, 'Pident': 0.28, 'Score': -53.5, 'Positives': 0.4509090909090909, 'Align2': 'AENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWI--GFDVEGC------HATREAVGTNLPLQLGFSTGV----NLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYV--YNPF--MIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEEL-FYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQ', 'Align1': 'MDLFMRIF--------------------------TIGTVTLKQGEIKDAT-------------------PSDFV-RATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLL---FVTVYSHLLLVAAGLEAP----FLYLYALVYFLQSINFVRI-----------------IMRLW-------------------LCWKCRSKNPLLYDAN-YFLCWHTNC-YDYCIPYNSVTSSIVITSGDGTTSPISEHD

{'Coverage2': 0.6489563567362429, 'Begin': 0, 'End': 604, 'Coverage1': 0.8162291169451074, 'Pident': 0.2243436754176611, 'Score': -77.0, 'Positives': 0.3532219570405728, 'Align2': '-----------AENVT---------------------------GLFKDCSKVITGLHPTQAPTHLSVDTKF-KTEGLCVDIPGIPKDMT--YRRLISMM----GFKMNYQVNGYPNMFITREEAIRHVRA------WIGFDVEGC-HATREAVGTNLP-------LQLGFSTGVNLVAVPTGYV---------DTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGD-ELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKH--AFHT-----PAFDKSAFVNLKQ-LPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQ', 'Align1': 'MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTAL--TQ---HGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVA--TEGALNTPKDHIGTRNPANNAAIVLQLPQGT-----TLPKGFYAEGSRGGSQASSRSSSR

{'Coverage2': 0.34971098265895956, 'Begin': 0, 'End': 346, 'Coverage1': 1.0, 'Pident': 0.2975206611570248, 'Score': -70.5, 'Positives': 0.512396694214876, 'Align2': 'SLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQ', 'Align1': '--------------------------------------------------------------------MKIILFLALITLATCELYHYQE---------CVRGTTVLLKEP----CS------SGTYEGNSPFHPLADNKFALT--------------------------CFSTQFAFACP-DGV----KHVYQLRARSV---SPK-------LFIRQEEVQELY-----------------------------------SPIFL-----------------------------------------------IVAAIVFITLCF---------------TLKRKTE', 'Alignment': '                                                                    +||+--|-+---|---++-|+-         -|---|-+-|+|

{'Coverage2': 0.9710982658959537, 'Begin': 0, 'End': 1283, 'Coverage1': 0.26394344069128045, 'Pident': 0.34971098265895956, 'Score': -260.5, 'Positives': 0.5317919075144508, 'Align2': 'S------LENVAFNVVN----------------KGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKT-------------------TLPVN--VAFELWAKRNI-----------KPVPEVKILNN------------------LGVDIAANTVIW--------------------------------DYK--------------------------RDAPAHIST--------IGV---------------------------------------------------------CSMTDIAK----------------------KPTETI---------C-----------------------------------APLTVF---------------------------------------------------------------FDGRVDGQVD----LFRNAR--------------------NGVLITEG-------SVKGLQPS------------------------VGPKQA-----------SLNGVT------------------------------------------------------------------------------------------------------LIG-------------------EAVKTQFNYYKKVDGVVQQ-----------------------LPETYFT------------------------------------------------------QSRNLQE--------FK---------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.14483541430192962, 'Pident': 1.0, 'Score': 1450.5, 'Positives': 1.0, 'Align2': '------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG-------------------------------------------

{'Coverage2': 0.11442006269592477, 'Begin': 0, 'End': 640, 'Coverage1': 0.9733333333333334, 'Pident': 0.4266666666666667, 'Score': -238.0, 'Positives': 0.6533333333333333, 'Align2': 'AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCA--CEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG', 'Align1': 'MY-----------------------------------------------------------------------------------------SFV-------------------------------------------------------SEETGTLI------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.0899098083427283, 'Pident': 1.0, 'Score': 105.0, 'Positives': 1.0, 'Align2': '------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG---------------------------------------------

{'Coverage2': 0.3981191222570533, 'Begin': 0, 'End': 659, 'Coverage1': 0.9236363636363636, 'Pident': 0.2581818181818182, 'Score': -116.0, 'Positives': 0.46545454545454545, 'Align2': 'AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSAS---TSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSIL----------SPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVE--------FLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG', 'Align1': '---------------------MDLFMR------------------------------------------------------------------IFTIGTV--TLK-----------QGEIKDATP-------------------

{'Coverage2': 0.06739811912225706, 'Begin': 0, 'End': 638, 'Coverage1': 1.0, 'Pident': 0.4186046511627907, 'Score': -279.0, 'Positives': 0.627906976744186, 'Align2': 'AYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGG', 'Align1': 'MI-----------------------------------ELSLID----FYLC----------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.11350737797956867, 'Pident': 1.0, 'Score': 667.5, 'Positives': 1.0, 'Align2': '----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.23, 'Begin': 0, 'End': 506, 'Coverage1': 0.9504132231404959, 'Pident': 0.33884297520661155, 'Score': -130.0, 'Positives': 0.5371900826446281, 'Align2': '-KIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFL--PGVYSVIYLYLTFYLTNDVS---FLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQ', 'Align1': 'MKII--------------LFLALI-----------------------------------TLATC-----------------------------------------------------ELYHY--------------------------------QEC-----------------------------------------------------VRGTTVLLKEPCSSGTYEG---------------NSPF---------------------------------------------------------------

{'Coverage2': 0.97, 'Begin': 0, 'End': 1288, 'Coverage1': 0.3809897879025923, 'Pident': 0.302, 'Score': -189.0, 'Positives': 0.478, 'Align2': 'KIVNNWLKQLIKVTLVFL-------------FVAAIFY---------------LITP----------VHV-------------------------------------------------------------------------------------------------------------------------------------MSKHT--------------------------------------------DFSS--------EIIGY--------KAIDGGVTRDIAS-------------------------------------------------------------------------------------------------TDTCFAN----------------------KHADFD------------TWFSQR-----GGSY-------------------------TNDKAC--------------------------PLIAAVITRE----------------------VGFVVPGLPGT-ILRTTNGDFLHF-------------------------LPRVFSAVGNICYTPSK------LIEYTD----------------------------FATSA-CVLAA-------ECTIFKDASGKPVPYCYDTNV----LEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTF--------------------------DSEYC-----RHGT-C---ERSEAGVCV----STSGRWVLNNDYYRSLP------------------------------------------------GVFCGV

{'Coverage2': 0.3137254901960784, 'Begin': 0, 'End': 307, 'Coverage1': 0.9896907216494846, 'Pident': 0.31958762886597936, 'Score': -57.5, 'Positives': 0.5360824742268041, 'Align2': 'SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVIN-GDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ', 'Align1': '-------------------------------------------------MDPKISEM--------------HPALRLVDPQIQLAVTRME-----------------------------------------------------------------------------------------------------NAVGRDQN---NVGPKVYPIILRLGSPLSLNMARKTLNSL-------------EDKAFQLTPIAVQ-----MTKLATTEE----------------LPDEF----VV-----VTVK', 'Alignment': '                                                 ++|---++              +--||++---+|--|-+++                                                                                   

{'Coverage2': 0.3888888888888889, 'Begin': 0, 'End': 308, 'Coverage1': 0.9834710743801653, 'Pident': 0.2809917355371901, 'Score': -59.5, 'Positives': 0.45454545454545453, 'Align2': 'SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGP--FVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ', 'Align1': 'M----------KIILFLALITLATCEL----------------------------------------------------YHYQECVRGTTVLLKEP--------------------CSSGTYEG-----NSPFHPLADNKFALTCFSTQF--AFAC-----------PDGVKHVYQLRARSVSPKLFIRQEEVQE--------------LYSPI-----------------FLIVA-----------------------AIVFITLCFTLKR--------KT-------------------------E', 'Alignment': '-          |+---+--+|--|--|                                                    +--|-||----|----|                    |-+|+--|     --|---+--+----+|-|--|  -+-|           |-||

{'Coverage2': 0.9575163398692811, 'Begin': 0, 'End': 1286, 'Coverage1': 0.23016496465043204, 'Pident': 0.369281045751634, 'Score': -302.0, 'Positives': 0.545751633986928, 'Align2': '-----------------------------SGFRKMAFPSGKV-----------------------EGCMVQVTCGT-------------------------------TTL--------------------------------------NGLWLDDV--VYCPRHVICTSEDMLNP----------NYEDL----------------------LIRKSNHNFLVQAGNVQLRV-----------------------------------IGH---------------------------SMQNCVLK-LKVDTANPKTPKYKFVRIQPGQ------------------------------------------------TFSVLACYNGSP--------SGVYQCAMRPNFTIKG--------------------------------------SFLNGS-------------------------------CGSV-GFN---------------IDYD-------------------------------CVSFCY-----------------------------------------------------------------------MHHMELPTGVHA------------GTD------------------------------------------------------------------------LEGNF-------------------------------------YGPFV--------------DRQTAQA-----------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.06946651532349603, 'Pident': 1.0, 'Score': -422.5, 'Positives': 1.0, 'Align2': '---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.3888888888888889, 'Begin': 0, 'End': 308, 'Coverage1': 0.9834710743801653, 'Pident': 0.2809917355371901, 'Score': -59.5, 'Positives': 0.45454545454545453, 'Align2': 'SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGP--FVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQ', 'Align1': 'M----------KIILFLALITLATCEL----------------------------------------------------YHYQECVRGTTVLLKEP--------------------CSSGTYEG-----NSPFHPLADNKFALTCFSTQF--AFAC-----------PDGVKHVYQLRARSVSPKLFIRQEEVQE--------------LYSPI-----------------FLIVA-----------------------AIVFITLCFTLKR--------KT-------------------------E', 'Alignment': '-          |+---+--+|--|--|                                                    +--|-||----|----|                    |-+|+--|     --|---+--+----+|-|--|  -+-|           |-||

{'Coverage2': 0.9575163398692811, 'Begin': 0, 'End': 1286, 'Coverage1': 0.23016496465043204, 'Pident': 0.369281045751634, 'Score': -302.0, 'Positives': 0.545751633986928, 'Align2': '-----------------------------SGFRKMAFPSGKV-----------------------EGCMVQVTCGT-------------------------------TTL--------------------------------------NGLWLDDV--VYCPRHVICTSEDMLNP----------NYEDL----------------------LIRKSNHNFLVQAGNVQLRV-----------------------------------IGH---------------------------SMQNCVLK-LKVDTANPKTPKYKFVRIQPGQ------------------------------------------------TFSVLACYNGSP--------SGVYQCAMRPNFTIKG--------------------------------------SFLNGS-------------------------------CGSV-GFN---------------IDYD-------------------------------CVSFCY-----------------------------------------------------------------------MHHMELPTGVHA------------GTD------------------------------------------------------------------------LEGNF-------------------------------------YGPFV--------------DRQTAQA-----------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.06583427922814983, 'Pident': 1.0, 'Score': -557.5, 'Positives': 1.0, 'Align2': '---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.35517241379310344, 'Begin': 0, 'End': 308, 'Coverage1': 0.8512396694214877, 'Pident': 0.2727272727272727, 'Score': -49.5, 'Positives': 0.4132231404958678, 'Align2': 'SAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYE----------NAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGI---VFMCVE-----YCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQ', 'Align1': 'MKIILFL------ALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQF-----AFAC----------------------------------------------------------------PDGVKHVYQLR------------------------------------------ARSVSPKLFIRQEEVQELYSPIFLIVA---AIVFITLCF------------------------------------------------------------------TLKRKT-E', 'Alignment': '--+---+      -|+|+-|--|---|---------|-|          +-|-|-|----|++-|+--|     ||-|                                                                -||-+-|+-|-                  

{'Coverage2': 0.9689655172413794, 'Begin': 0, 'End': 1282, 'Coverage1': 0.22073841319717202, 'Pident': 0.3137931034482759, 'Score': -340.5, 'Positives': 0.5137931034482759, 'Align2': 'SAV----------------KRT-----------------------------------------------IKGTH----------------------------HWLLLTIL----TSLLVLVQST--------------------------QWSLFFF---------LYENAFLPFAMGI-----------------------------------------------------IAMSAFAMMFVKHKHAFLCLFLLP-------SLATVAYFNMVYMPASWVMRIM---TWLDMVDTSLSGFKLKDCVMYASAVVLLILMTAR----------------------------------------------------------------------------TVYDD--------------------------------GARRVWTLMNV-------LTLVYKVY--------------------------------------YG-----------------------------------------------NAL--------------------------------------------------------------------------------DQAISMWAL--------------------------------------------------------IISVTSNYSGVVTTVMFLARGI-------VFMCVEYCPI----------FFITGNTLQC--IMLVYCFLGYFCT--------------------------CY-------FG---------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.0188422247446084, 'Pident': 1.0, 'Score': -1775.0, 'Positives': 1.0, 'Align2': '---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.0116967305524239, 'Pident': 1.0, 'Score': -3120.5, 'Positives': 1.0, 'Align2': '---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.06520031421838178, 'Pident': 0.43373493975903615, 'Score': -532.5, 'Positives': 0.7108433734939759, 'Align2': '---------------------------------------------------------------------------------------------------------------SK--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------MSDVKCT--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------SVVLLS-----------------------------------------------------------------------------------------------VLQQ-------------------------------------------------------------------------------------LRVESS---------------------------------------------SKLWAQ----CVQLH------------------------------------------------------------------

{'Coverage2': 0.7777777777777778, 'Begin': 0, 'End': 266, 'Coverage1': 0.6936936936936937, 'Pident': 0.20707070707070707, 'Score': -37.0, 'Positives': 0.36363636363636365, 'Align2': 'AIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLN-------------VAKSEFDRDAAMQR-------KL--------EKMADQAMTQMYKQARSEDKRAKVTSAMQTM--------------LFTMLRKL--DNDALNNIINNARDGCV---PLNIIPLTTAAKLMVVIPDY----NTYKNTCD----------GTTFTYASALWEIQQVVDADSKIVQLS-------EISMDNSPNLAWPLIVTALRANSAVKLQ', 'Align1': '-------------------------MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQF---AYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVY---RINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLNVPLHGTILTRPLLESELVIGA---VILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQRVAGDSGFAAYSRYRIGNYKLNTDHSSS----------SDNIALLVQ', 'Alignment': '                         +|+-+--+-+++|||-|-             +---+|   |---|       ||        --+|---+--+|   |-------+--||--+              ||---|-+  -|---|-++|----|-+   ||----|---|   |++--+    --+---||          -|+-|-+-------|-|--||-----|       +++-|+|-+       

{'Coverage2': 0.9696969696969697, 'Begin': 0, 'End': 1279, 'Coverage1': 0.15082482325216026, 'Pident': 0.3838383838383838, 'Score': -403.0, 'Positives': 0.6111111111111112, 'Align2': '---------AIASEFSSL-------PSYA---------------------------------------------------------------------------AFATAQEAYEQA--VANGDSEVVLK----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------KL--------------------------------------------------------------------------------------------------------------------------------------------KKSLNVAKS---------------------------EFDRDAA-------MQRKLEKM----------------------------------------ADQ----------------------------------------AMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDAL--NNIINNARDGCVPLN--------IIPL-----------------TTAAKLMV-----------------VIPDYNT----------YK--------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.02565266742338252, 'Pident': 1.0, 'Score': -1572.0, 'Positives': 1.0, 'Align2': '--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.01592446448703495, 'Pident': 1.0, 'Score': -2917.5, 'Positives': 1.0, 'Align2': '--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.9646017699115044, 'Begin': 0, 'End': 1277, 'Coverage1': 0.08562450903377848, 'Pident': 0.4778761061946903, 'Score': -484.5, 'Positives': 0.7079646017699115, 'Align2': '---------------------------------------------------------------------------------------------------------------------------------------------------NNE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LSPVAL--------------------RQMS--------------------CA-----------------------------------------AGTT-----------------------------------------------------------------------------------------------------QTA----------------------------------CTD----------------------------------------------------------------------------------DNALAYYN-------------TTK------------------------------------------------------------------------GG-----------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.005355129650507328, 'Pident': 0.7105263157894737, 'Score': -3465.0, 'Positives': 0.9473684210526315, 'Align2': 'M------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.029850746268656716, 'Pident': 0.5789473684210527, 'Score': -569.0, 'Positives': 0.7368421052631579, 'Align2': 'M--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GYI---------------------------------------------------------------------------------------------------------------------------NVFAFPFTI----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSLLL--------------------------------------CRM------------------------------------NSRNYIAQV---------------------------------------------

{'Coverage2': 0.22181818181818183, 'Begin': 0, 'End': 275, 'Coverage1': 1.0, 'Pident': 0.36065573770491804, 'Score': -87.0, 'Positives': 0.6065573770491803, 'Align2': 'MDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL', 'Align1': 'M--FHLV-------------------DFQVTIAEI----------------LLIIMRTFKVSI-----WNL------DYIINLII--------------------------------------------------KN-------------------------------------LSKSLTENKY--------------------------------SQLDEEQPME----------------------ID-------------------------', 'Alignment': '|  |--+                   ||----|-|                ||-+-++----|     |-|      -++-||++                                                  ||                                     -+--++|+-|                                +||--+--+|                      || 

{'Coverage2': 0.9563636363636364, 'Begin': 0, 'End': 1285, 'Coverage1': 0.206598586017282, 'Pident': 0.4218181818181818, 'Score': -315.5, 'Positives': 0.5527272727272727, 'Align2': 'M---------------------------------------------------DLFMRIF-------------TIGT-------------------------------VTL-----------------------------------------------------------------------KQGEIKDA------------------TPSDFVR-----ATATIPIQASLPFGWLIVGV-ALLAVFQS-------------------------------------------------ASKIITLKKRWQLALSKGVHFVCNLL----------------------------------------------------------------------LLFVTVYSHLLLVAAGLE----AP------------------------------------FLYLYAL--------------------------------VYF-LQSINF----------VRIIMRLW--------LCWKCRSKN----------------------------------------------------------------------------PLLY-DANYFLC---------------W---------------------HTN-------------CYDY--------------------------------------CIPYN---SVTSSIVITS-------------GDGT-------------------------------------------TSPISEH-------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 4405, 'Coverage1': 0.012939841089670829, 'Pident': 0.631578947368421, 'Score': -2075.5, 'Positives': 0.7368421052631579, 'Align2': 'M----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AYCWR-------------------------------------------------------------------------------CTSCCFS----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ERFQNHN---------------------------------------------------------------------------------------------------------------------

{'Coverage2': 0.9824561403508771, 'Begin': 0, 'End': 7097, 'Coverage1': 0.007891770011273957, 'Pident': 0.6491228070175439, 'Score': -3418.5, 'Positives': 0.7894736842105263, 'Align2': 'M---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.04477611940298507, 'Pident': 0.45614035087719296, 'Score': -545.5, 'Positives': 0.6140350877192983, 'Align2': 'M------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AYCW------------------------RC--------------------------------------------------TSC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CFSERFQNHNPQK----------------------------------------------------------------EMATSTLQGCSLCLQL-------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.008596392333709131, 'Pident': 0.5901639344262295, 'Score': -3420.0, 'Positives': 0.8032786885245902, 'Align2': 'M------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.047918303220738416, 'Pident': 0.5245901639344263, 'Score': -539.0, 'Positives': 0.7377049180327869, 'Align2': 'MF----------------------------------------------H--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LVD---------FQVTIA-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------EILLIIM-----------------------------------------------------------------------------------RTF-------KVSI--

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.01705186020293123, 'Pident': 0.5950413223140496, 'Score': -3307.5, 'Positives': 0.7851239669421488, 'Align2': 'M----------------------------------------------------------------------------------------------------------------------KIIL----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------FLALITL------------------ATCEL-------------------------------------------YH----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YQECVR---------------------------------------------------------GTTV-----------------------------------------------

{'Coverage2': 0.9834710743801653, 'Begin': 0, 'End': 1275, 'Coverage1': 0.09347996857816182, 'Pident': 0.45454545454545453, 'Score': -462.5, 'Positives': 0.6776859504132231, 'Align2': 'MKIILFLALITLATCE---------------------------------LYHYQE----------------------------------------------CVRG--------------------TTVLLK----------------------------------------------------------------------------------------------EP----------------------------CSSGTYEGNSPFH----------------------------PLADNKFALTCF-------------------------------------STQFA----------------------------FAC----------------------------------------------PD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GVKHV-----------------YQL------RARSVSPKLFI------------------------------------------------------------------------------------RQE---EVQELYSP----------------------

{'Coverage2': 0.9008264462809917, 'Begin': 0, 'End': 234, 'Coverage1': 0.49099099099099097, 'Pident': 0.2396694214876033, 'Score': -58.5, 'Positives': 0.4132231404958678, 'Align2': 'M------------------------------------------KF-----LVFLGII--TTVAAF-----------------HQEC---------------------SLQSCTQHQPYVVDDPCPIHFYSKWYIRVGARKSAPLI--ELCVDEAGSKSPIQYI--DIGNYTVSCSPFTINCQEP------KLGSLVVRCSFYEDFLEYHDVR----------------VVLDFI', 'Align1': 'MADSNGTITVEELKKLLEQWNLVIGFLFLTWICLLQFAYANRNRFLYIIKLIFLWLLWPVTLACFVLAAVYRINWITGGIAIAMACLVGLMWLSYFIASFRLFARTRSMWSFNPETNILLN--VPLH---------GTILTRPLLESELVIGAVILRGHLRIAGHHLGRCDIKDLPKEITVATSRTLSYYKLGASQ-RVAGDSGFAAYSRYRIGNYKLNTDHSSSSDNIALLVQ', 'Alignment': '|                                          +|     |+||-++  -|+|-|                 ---|                     |+-|-------+++  -|+|         |---+-||+  ||-+-----+--++--  -+|---+---|--|-----      |||+-- |-+----|--|---|                +-|---'}
COVID19orf8 P0DTD1
{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.0170

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.09505106048703849, 'Pident': 0.4793388429752066, 'Score': -461.0, 'Positives': 0.6363636363636364, 'Align2': 'M-KFLV------------------------------------------------FLGIITTVAAFH--------------------------------------------------------------QECSLQSCTQ---------------------------------HQPYVVD-------------------------------------------------------------------------------------------------------------------DP----------------------------------------CP------------------------IHFYSKWY--------------------------------IR-------------------------------------------------------------------------------------------------------------------------VGARKSAPLIE-LCVD---------------------------------------------------------------------------------------------------------EAG----------------------------------------SKSPIQY-IDIG----------------NYTVS---------------------------CS---------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.01366967305524239, 'Pident': 0.5773195876288659, 'Score': -3365.0, 'Positives': 0.8762886597938144, 'Align2': 'MD-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PKI---------------------------------------------------------------------------------------SEMHP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ALRLVD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.07619795758051846, 'Pident': 0.4536082474226804, 'Score': -498.0, 'Positives': 0.6597938144329897, 'Align2': 'M----------------------------------------------------------------------------------------------------------------------------------------DP--------------------------------------------------------------KISEMHPALRLV----------DPQIQLAV----------------------------------------------------------------------------------------------------------------------------TRMENAVG------------------------------------------------------------------------------------------------------RD--------------------------------QNNVGPKVYPIIL----RLGSPLSLNMARKTLNSLEDK----------------------------------------------------------------------------------------AFQLTP-----------------------------------------------------------------------------------------------IAVQMTK--------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 7096, 'Coverage1': 0.010287485907553552, 'Pident': 0.6575342465753424, 'Score': -3386.5, 'Positives': 0.8356164383561644, 'Align2': 'M---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LQSCYNF-----------------------------------------------------------------------------------------------------------------------------------------------------------------LKE------------------------------------------------------------------------------------------------------------------------------------------------------------

{'Coverage2': 1.0, 'Begin': 0, 'End': 1273, 'Coverage1': 0.05734485467399843, 'Pident': 0.5205479452054794, 'Score': -523.5, 'Positives': 0.6712328767123288, 'Align2': 'M----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LQS-------------------------------------------CYNF----------LKEQH------------------------------------------------------------------------------------------------------------------CQKASTQK---------------------GAE--------------------------------------------------------------------------------AAVK---------------------------------------------

In [25]:
proteins[proteins["ID"].map(lambda x: "orf7" in x.lower())]

Unnamed: 0,ID,Is_Viral,Gene_Name,Length,Sequence
355,COVID19orf7a,True,orf7a,121,MKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPF...


In [29]:
blacklist = ["P0DTD8", "P0DTC1", "P0DTD1"]
krogan2uni = defaultdict(str)
for uni, v in covid2best.iteritems():
    uniB, align = v
    print uni, uniB, len(align["Alignment"].replace("|", ""))
    if(not uni in blacklist):
        krogan2uni[uniB] = uni
    
    #if(align["Pident"] < 1):
    #    print uni
    #    my.alignPrint(align)

proteins["UniProt"] = proteins["ID"].map(lambda x: krogan2uni[x] if "COVID" in x else x)

P0DTC7 COVID19orf7a 0
A0A663DJA2 COVID19orf10 0
P0DTC6 COVID19orf6 0
P0DTC9 COVID19N 0
P0DTD2 COVID19orf9b 0
P0DTD3 COVID19orf9c 0
P0DTC5 COVID19M 0
P0DTC4 COVID19E 0
P0DTC3 COVID19orf3a 0
P0DTC2 COVID19Spike 0
P0DTC1 COVID19nsp1 4225
P0DTD8 COVID19Spike 1252
P0DTD1 COVID19nsp1 6916
P0DTC8 COVID19orf8 1


In [30]:
from jfb_tools import batchUniProtAPI
s = proteins["UniProt"].to_list()
tmp = dict(zip(s, batchUniProtAPI(s, source_id="ACC", target_id="GENENAME")))
proteins["Gene Name"] = proteins[["ID", "UniProt"]].apply(lambda x: tmp[x[1]] if not "COVID" in x[0] else x[0].replace("COVID19", "").replace("orf9c", "orf14").replace("Spike", "S").replace("C145A", "").upper(), axis=1)

proteins.to_csv("{0}/Data/Proteins.txt".format(base_dir), sep="\t", index=None)