In [1]:
import os
import time
import pandas as pd
import numpy as np
import pathlib
from io import StringIO
from Bio import SeqIO
from Bio.Cluster import distancematrix
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
import subprocess
from matplotlib import pyplot as plt

In [27]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

MagicBlast:

- raw reads to nt
- contigs to nt
- raw reads to gsa_virus
- contigs to gsa_virus

In [2]:
PRJ='PRJCA002202'
DATA_PATH=f'/mnt/1TB_0/Data/Assembly/{PRJ}/'


In [3]:
def clean_string(s):
    s = s.replace(',', '').strip()
    return s

In [4]:
def get_contigs_ascessions(contigs_file):
    contigs=[]
    accessions=[]
    with open(contigs_file, 'r') as temp_f:
        for line in temp_f:
            if len(line.strip())>0:
                vals=line.split("\t")
                contigs.append(clean_string(vals[0]))
                accessions.append(clean_string(vals[2]))
    return  contigs, accessions


In [5]:
def get_raw_sam_ascessions(reads_file, machine_id='v300043428'):
    accessions=[]
    with open(reads_file, 'r') as temp_f:
        for line in temp_f:
            if len(line.strip())>0:
                if line.startswith(machine_id):
                    vals=line.split("\t")
                    accessions.append(clean_string(vals[2]))
    return accessions

In [6]:
def get_val_count(accessions):
    values, counts = np.unique(accessions, return_counts=True)
    idx = np.argsort(counts)[::-1]
    values = np.array(values)[idx]
    counts = np.array(counts)[idx]
    return values, counts, idx

In [7]:
def get_titles(df, values):
    titles=[]
    for v in values:
        l=df.loc[df.accession==v, 'title'].tolist()
        titles.append(l[0])
    return titles

In [8]:
def get_accession_dat(values, dbname='nt'):
    vdats=[]
    for v in values:
        try:
            vdat=!blastdbcmd -db nt -entry $v
        except Exception as e:
            if dbname!='nt':
                vdat=!blastdbcmd -db 'nt' -entry $v
        if vdat is not None:
            vdats.append(vdat[0].split(',')[0])
    return vdats

## Raw Reads

### All CRR

In [9]:
sra_list=['CRR125934','CRR125935','CRR125936','CRR125937','CRR125938','CRR125939','CRR125940']

In [12]:
sra_results={}
for sra in sra_list:
    print(f'SRA: {sra}')
    reads_file=f'{sra}_raw_magicBLAST_gsa_bsl_nt_db.sam'
    reads_file=DATA_PATH+f'{sra}/magic_blast/'+reads_file
    accessions=get_raw_sam_ascessions(reads_file, machine_id='M04403')
    values, counts, idx = get_val_count(accessions)
    titles=get_accession_dat(values, dbname='gsa_bsl_nt_db')
    for t, v,c in zip(titles, values, counts):
        print(f'{t}, {v}, {c}')
    sra_results[sra]=[titles, values, counts]
        

SRA: CRR125934
Error: [blastdbcmd] Entry not found: CNA0013710, CNA0013710, 72786
>MF164268.1 Homo sapiens clone BAC JH12 genomic sequence, MF164268.1, 5816
>MT241668.1 Leopoldamys sabanus voucher MZF1958 mitochondrion, MT241668.1, 4334
>X51700.1 Bos taurus mRNA for bone Gla protein, 7, 3028
>MK280367.1 Homo sapiens lncAB371.6 lncRNA gene, MK280367.1, 890
>MK279923.1 Homo sapiens lncAB599.3 lncRNA gene, MK279923.1, 814
Error: [blastdbcmd] Entry not found: 2, 2, 744
>AJ289709.1 Human endogenous retrovirus H HERV-H/env62 proviral copy, AJ289709.1, 652
Error: [blastdbcmd] Entry not found: MW635910.1, MW635910.1, 618
>MW206626.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1805/2020 ORF1ab polyprotein (ORF1ab) and ORF1a polyprotein (ORF1ab) genes, MW206626.1, 618
Error: [blastdbcmd] Entry not found: MW637224.1, MW637224.1, 618
>AC123074.2 Mus musculus BAC clone RP23-168H19 from 10, AC123074.2, 556
>MT520543.1 Severe acute respiratory syndrome coronav

Error: [blastdbcmd] Entry not found: MW505431.1, MW505431.1, 20
Error: [blastdbcmd] Entry not found: MW643347.1, MW643347.1, 20
Error: [blastdbcmd] Entry not found: MW505814.1, MW505814.1, 20
Error: [blastdbcmd] Entry not found: MW668901.1, MW668901.1, 20
>MW206526.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1856/2020 ORF1ab polyprotein (ORF1ab) and ORF1a polyprotein (ORF1ab) genes, MW206526.1, 20
Error: [blastdbcmd] Entry not found: MW626204.1, MW626204.1, 20
>LR824567.1 Severe acute respiratory syndrome coronavirus 2 isolate 140089 genome assembly, LR861099.1, 20
Error: [blastdbcmd] Entry not found: MW495854.1, MW495854.1, 20
>LR824135.1 Severe acute respiratory syndrome coronavirus 2 isolate 120011 genome assembly, LR860690.1, 20
Error: [blastdbcmd] Entry not found: MW495137.1, MW495137.1, 20
>MW184296.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/AUS/VIC15041/2020 ORF1ab polyprotein (ORF1ab) and ORF1a polyprote

Error: [blastdbcmd] Entry not found: MW640818.1, MW640818.1, 12
Error: [blastdbcmd] Entry not found: MW637442.1, MW637442.1, 12
Error: [blastdbcmd] Entry not found: MW556180.1, MW556180.1, 12
Error: [blastdbcmd] Entry not found: MW637457.1, MW637457.1, 12
Error: [blastdbcmd] Entry not found: MW637458.1, MW637458.1, 12
Error: [blastdbcmd] Entry not found: MW637477.1, MW637477.1, 12
Error: [blastdbcmd] Entry not found: MW555917.1, MW555917.1, 12
Error: [blastdbcmd] Entry not found: MW637260.1, MW637260.1, 12
Error: [blastdbcmd] Entry not found: MW634865.1, MW634865.1, 12
Error: [blastdbcmd] Entry not found: MW566839.1, MW566839.1, 12
Error: [blastdbcmd] Entry not found: MW564995.1, MW564995.1, 12
Error: [blastdbcmd] Entry not found: MW565821.1, MW565821.1, 12
>MW449465.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/UT-UPHL-2012959648/2020 ORF1ab polyprotein (ORF1ab) gene, MW449465.1, 12
Error: [blastdbcmd] Entry not found: MW639514.1, MW639514.1, 12
Error:

Error: [blastdbcmd] Entry not found: MW513651.1, MW513651.1, 8
Error: [blastdbcmd] Entry not found: MW513646.1, MW513646.1, 8
Error: [blastdbcmd] Entry not found: MW555320.1, MW555320.1, 8
>MT834670.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-0512/2020 ORF1ab polyprotein (ORF1ab), MT834670.1, 8
>MT834669.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-0511/2020 ORF1ab polyprotein (ORF1ab), MT834669.1, 8
>MT834662.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-0552/2020, MT834662.1, 8
>MT834661.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-0551/2020, MT834661.1, 8
>MT834657.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-0547/2020, MT834657.1, 8
Error: [blastdbcmd] Entry not found: MW555909.1, MW555909.1, 8
>MT834194.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/hum

Error: [blastdbcmd] Entry not found: MW565741.1, MW565741.1, 6
>MW030220.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/PER/covper084/2020, MW030220.1, 6
>MW030236.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/PER/covper005/2020, MW030236.1, 6
Error: [blastdbcmd] Entry not found: MW565734.1, MW565734.1, 6
>MW276469.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-CZB-10590/2020, MW276469.1, 6
Error: [blastdbcmd] Entry not found: MW565728.1, MW565728.1, 6
Error: [blastdbcmd] Entry not found: MW641687.1, MW641687.1, 6
Error: [blastdbcmd] Entry not found: MW643881.1, MW643881.1, 6
>MW030254.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/PER/covper023/2020, MW030254.1, 6
Error: [blastdbcmd] Entry not found: MW565452.1, MW565452.1, 6
>MW031071.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WI-UW-1504/2020, MW031071.1, 6
>MW031073.1 Severe acut

>MW206454.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1764/2020 ORF1ab polyprotein (ORF1ab), MW206454.1, 4
>MW273844.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/POL/PL_MCB_62/2020, MW273844.1, 4
>MW206499.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1940/2020 ORF1ab polyprotein (ORF1ab) and ORF1a polyprotein (ORF1ab) genes, MW206499.1, 4
>MW245985.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/TX-DSHS-1214/2020 ORF1ab polyprotein (ORF1ab) gene, MW245985.1, 4
>MW276477.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-CZB-165/2020, MW276477.1, 4
Error: [blastdbcmd] Entry not found: MW642492.1, MW642492.1, 4
>MW206567.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1909/2020 ORF1ab polyprotein (ORF1ab), MW206567.1, 4
>MW190121.1 Severe acute respiratory syndrome coronaviru

>LR814024.3 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EC741 genome assembly, LR814024.3, 4
>LR814045.2 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EC51A genome assembly, LR814045.2, 4
Error: [blastdbcmd] Entry not found: MW671637.1, MW671637.1, 4
>LR814119.1 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EAFF0 genome assembly, LR814119.1, 4
>LR814169.1 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EB645 genome assembly, LR814169.1, 4
Error: [blastdbcmd] Entry not found: MW669828.1, MW669828.1, 4
>LR814202.1 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EB9E2 genome assembly, LR814202.1, 4
>LR814205.3 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EC8A8 genome assembly, LR814205.3, 4
>LR814219.2 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EB1DB genome assembly, LR814219.2, 4
>LR814222.1 Severe acute respiratory syndrome coronavirus 2 isolate NORW-EAD6F genome assembly, LR814222

Error: [blastdbcmd] Entry not found: MW636408.1, MW636408.1, 4
Error: [blastdbcmd] Entry not found: MW523837.1, MW523837.1, 4
Error: [blastdbcmd] Entry not found: MW636416.1, MW636416.1, 4
Error: [blastdbcmd] Entry not found: MW535176.1, MW535176.1, 4
Error: [blastdbcmd] Entry not found: MW555949.1, MW555949.1, 4
Error: [blastdbcmd] Entry not found: MW523820.1, MW523820.1, 4
Error: [blastdbcmd] Entry not found: MW555953.1, MW555953.1, 4
Error: [blastdbcmd] Entry not found: MW555954.1, MW555954.1, 4
Error: [blastdbcmd] Entry not found: MW550668.1, MW550668.1, 4
Error: [blastdbcmd] Entry not found: MW555914.1, MW555914.1, 4
Error: [blastdbcmd] Entry not found: MW555913.1, MW555913.1, 4
Error: [blastdbcmd] Entry not found: MW523885.1, MW523885.1, 4
Error: [blastdbcmd] Entry not found: MW638471.1, MW638471.1, 4
Error: [blastdbcmd] Entry not found: MW555891.1, MW555891.1, 4
Error: [blastdbcmd] Entry not found: MW617628.1, MW617628.1, 4
Error: [blastdbcmd] Entry not found: MW533286.1, MW5332

Error: [blastdbcmd] Entry not found: MW636763.1, MW636763.1, 2
Error: [blastdbcmd] Entry not found: MW577926.1, MW577926.1, 2
>MT952633.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/VA-DCLS-0748/2020, MT952633.1, 2
>MT952635.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/VA-DCLS-0750/2020, MT952635.1, 2
Error: [blastdbcmd] Entry not found: MW577905.1, MW577905.1, 2
>MT952673.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/VA-DCLS-0997/2020, MT952673.1, 2
>MT955114.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-395a/2020, MT955114.1, 2
Error: [blastdbcmd] Entry not found: MW577858.1, MW577858.1, 2
Error: [blastdbcmd] Entry not found: MW577859.1, MW577859.1, 2
Error: [blastdbcmd] Entry not found: MW636793.1, MW636793.1, 2
>MT954072.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-390b/2020, MT954072.1, 2
Error: [blastdbcmd

Error: [blastdbcmd] Entry not found: MW626620.1, MW626620.1, 2
Error: [blastdbcmd] Entry not found: MW626621.1, MW626621.1, 2
Error: [blastdbcmd] Entry not found: MW626622.1, MW626622.1, 2
Error: [blastdbcmd] Entry not found: MW626624.1, MW626624.1, 2
>LR882392.1 Severe acute respiratory syndrome coronavirus 2 isolate 260032 genome assembly, LR882392.1, 2
Error: [blastdbcmd] Entry not found: MW667413.1, MW667413.1, 2
Error: [blastdbcmd] Entry not found: MW626597.1, MW626597.1, 2
>LR882435.1 Severe acute respiratory syndrome coronavirus 2 isolate 260115 genome assembly, LR882435.1, 2
Error: [blastdbcmd] Entry not found: MW667414.1, MW667414.1, 2
Error: [blastdbcmd] Entry not found: MW635046.1, MW635046.1, 2
Error: [blastdbcmd] Entry not found: MW667357.1, MW667357.1, 2
>LR882773.1 Severe acute respiratory syndrome coronavirus 2 isolate 280092 genome assembly, LR882773.1, 2
Error: [blastdbcmd] Entry not found: MW626556.1, MW626556.1, 2
Error: [blastdbcmd] Entry not found: MW667359.1, MW6

Error: [blastdbcmd] Entry not found: MW617527.1, MW617527.1, 2
>MT358671.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WA-UW-4335/2020, MT358671, 2
Error: [blastdbcmd] Entry not found: MW617530.1, MW617530.1, 2
Error: [blastdbcmd] Entry not found: MW617548.1, MW617548.1, 2
>MT358654.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CT-UW-4251/2020, MT358654, 2
>MT350253.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/VA-DCLS-0064/2020, MT350253, 2
Error: [blastdbcmd] Entry not found: MW617551.1, MW617551.1, 2
Error: [blastdbcmd] Entry not found: MW635417.1, MW635417.1, 2
>MT358637.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC1/2020, MT358637, 2
>MT350277.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WA-CDC-0441/2020, MT350277, 2
>MT350275.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WA-CDC-0

Error: [blastdbcmd] Entry not found: MW496055.1, MW496055.1, 2
Error: [blastdbcmd] Entry not found: MW639644.1, MW639644.1, 2
Error: [blastdbcmd] Entry not found: MW639648.1, MW639648.1, 2
Error: [blastdbcmd] Entry not found: MW496060.1, MW496060.1, 2
>MW485904.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-LACPHL-AF00172/2021, MW485904.1, 2
>MW485903.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-LACPHL-AF00171/2021 ORF1ab polyprotein (ORF1ab), MW485903.1, 2
>MW485901.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-LACPHL-AF00162/2021 ORF1ab polyprotein (ORF1ab), MW485901.1, 2
>MW486212.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WI-UW-2431/2020, MW486212.1, 2
>MW486213.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WI-UW-2432/2020, MW486213.1, 2
>MW486215.1 Severe acute respiratory syndrome coronavirus 2 isolate SAR

Error: [blastdbcmd] Entry not found: MW643745.1, MW643745.1, 2
Error: [blastdbcmd] Entry not found: MW565122.1, MW565122.1, 2
Error: [blastdbcmd] Entry not found: MW643744.1, MW643744.1, 2
>MW064882.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-QDX-939/2020, MW064882.1, 2
>MW065038.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/MN-QDX-1014/2020, MW065038.1, 2
>MW065005.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-QDX-1042/2020, MW065005.1, 2
Error: [blastdbcmd] Entry not found: MW565104.1, MW565104.1, 2
Error: [blastdbcmd] Entry not found: MW565108.1, MW565108.1, 2
Error: [blastdbcmd] Entry not found: MW643732.1, MW643732.1, 2
>MW064990.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/AZ-QDX-1054/2020, MW064990.1, 2
Error: [blastdbcmd] Entry not found: MW637181.1, MW637181.1, 2
>MW064873.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV

>MW223133.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/TX-DSHS-0884/2020 ORF1ab polyprotein (ORF1ab), MW223133.1, 2
Error: [blastdbcmd] Entry not found: MW549246.1, MW549246.1, 2
Error: [blastdbcmd] Entry not found: MW549242.1, MW549242.1, 2
>MW228191.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/TX-DSHS-1055/2020 ORF1ab polyprotein (ORF1ab), MW228191.1, 2
Error: [blastdbcmd] Entry not found: MW549209.1, MW549209.1, 2
>MW228216.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/TX-DSHS-1041/2020 ORF1ab polyprotein (ORF1ab), MW228216.1, 2
Error: [blastdbcmd] Entry not found: MW549221.1, MW549221.1, 2
Error: [blastdbcmd] Entry not found: MW642489.1, MW642489.1, 2
Error: [blastdbcmd] Entry not found: MW637851.1, MW637851.1, 2
>MW228201.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/TX-DSHS-1050/2020 ORF1ab polyprotein (ORF1ab), MW228201.1, 2
>MW228192.1 Severe acu

>MT241668.1 Leopoldamys sabanus voucher MZF1958 mitochondrion, MT241668.1, 1618
>MF164268.1 Homo sapiens clone BAC JH12 genomic sequence, MF164268.1, 434
Error: [blastdbcmd] Entry not found: CNA0013710, CNA0013710, 272
>X51700.1 Bos taurus mRNA for bone Gla protein, 7, 140
>AC123074.2 Mus musculus BAC clone RP23-168H19 from 10, AC123074.2, 42
>MK280367.1 Homo sapiens lncAB371.6 lncRNA gene, MK280367.1, 34
>MK279923.1 Homo sapiens lncAB599.3 lncRNA gene, MK279923.1, 26
Error: [blastdbcmd] Entry not found: 2, 2, 14
>AC105600.5 Rattus norvegicus 4 BAC CH230-209E1 (Children's Hospital Oakland Research Institute) complete sequence, AC105600.5, 14
>MK280359.1 Homo sapiens lncAB370.3 lncRNA gene, MK280359.1, 8
>MW332979.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/WI-UW-1863/2020, MW332979.1, 2
>MW134275.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/CA-CZB-3871/2020 ORF1ab polyprotein (ORF1ab) and ORF1a polyprotein (ORF1ab) ge

>X51700.1 Bos taurus mRNA for bone Gla protein, 7, 9702
Error: [blastdbcmd] Entry not found: CNA0013710, CNA0013710, 4040
>AC123074.2 Mus musculus BAC clone RP23-168H19 from 10, AC123074.2, 3806
>MF164268.1 Homo sapiens clone BAC JH12 genomic sequence, MF164268.1, 1594
>MT241668.1 Leopoldamys sabanus voucher MZF1958 mitochondrion, MT241668.1, 994
>AC105600.5 Rattus norvegicus 4 BAC CH230-209E1 (Children's Hospital Oakland Research Institute) complete sequence, AC105600.5, 788
Error: [blastdbcmd] Entry not found: 2, 2, 638
>MT451205.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/AUS/VIC313/2020 ORF1ab polyprotein (ORF1ab), MT451205.1, 320
Error: [blastdbcmd] Entry not found: MW638235.1, MW638235.1, 280
>MW490886.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-2293/2020, MW490886.1, 280
>MW056121.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1496/2020 ORF1ab polyprotein (ORF1ab), M

>MT952701.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/VA-DCLS-1047/2020, MT952701.1, 2
>MT953980.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-383a/2020, MT953980.1, 2
Error: [blastdbcmd] Entry not found: MW638593.1, MW638593.1, 2
>MT953991.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-384b/2020, MT953991.1, 2
>MT954922.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-380b/2020 ORF1ab polyprotein (ORF1ab), MT954922.1, 2
>MT955114.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-395a/2020, MT955114.1, 2
>MT955326.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/IND/GBRC-395b/2020, MT955326.1, 2
Error: [blastdbcmd] Entry not found: MW638586.1, MW638586.1, 2
>MT956771.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/FL-BPHL-1161/2020 ORF1ab polyprotein (OR

SRA: CRR125938
SRA: CRR125939
SRA: CRR125940


In [30]:
for ke, va in sra_results.items():
    print(f'{ke}:')
    for t,v,c in zip(va[0], va[1], va[2]):
        if not t.startswith('Error') and not 'Severe acute respiratory syndrome coronavirus 2' in t:
            print(f'{t}, {v}, {c}')
    print(f'\n')
    

CRR125934:
>MF164268.1 Homo sapiens clone BAC JH12 genomic sequence, MF164268.1, 5816
>MT241668.1 Leopoldamys sabanus voucher MZF1958 mitochondrion, MT241668.1, 4334
>X51700.1 Bos taurus mRNA for bone Gla protein, 7, 3028
>MK280367.1 Homo sapiens lncAB371.6 lncRNA gene, MK280367.1, 890
>MK279923.1 Homo sapiens lncAB599.3 lncRNA gene, MK279923.1, 814
>AJ289709.1 Human endogenous retrovirus H HERV-H/env62 proviral copy, AJ289709.1, 652
>AC123074.2 Mus musculus BAC clone RP23-168H19 from 10, AC123074.2, 556
>AC105600.5 Rattus norvegicus 4 BAC CH230-209E1 (Children's Hospital Oakland Research Institute) complete sequence, AC105600.5, 132
>X68321.1 B.taurus mRNA for cyclin A, 9, 96
>U13856.1 pGEX-5X1 cloning vector, U13856.1, 76
>KY926444.1 Cloning vector pET-hsp12, KY926444.1, 60
>MN996532.2 Bat coronavirus RaTG13, MN996532.2, 56
>MK280359.1 Homo sapiens lncAB370.3 lncRNA gene, MK280359.1, 46
>MN996867.1 Cloning vector pcDNA3.1_+, MN996867.1, 10
>X17276.1 Giant Panda satellite 1 DNA, 4, 8


In [11]:
#flu_titles = [i for i in titles if 'influenza' in i.lower()]
#flu_titles