In [1]:
# Dependencies
from modules.blast import Blast
import pandas as pd
import time

In [2]:
b = Blast()

In [3]:
status, params, _ = b.get_parameters()

print(status, params)

True ['program', 'task', 'matrix', 'alignments', 'scores', 'exp', 'dropoff', 'match_scores', 'gapopen', 'gapext', 'filter', 'seqrange', 'gapalign', 'wordsize', 'taxids', 'compstats', 'align', 'transltable', 'stype', 'sequence', 'database']


In [4]:
# Start a BLAST job
status, job_id, _ = b.run_job(email='damiano.clementel@studenti.unipd.it',
                              sequence='VPSGWKAVFDDEYQTWYYVDLSTNSSQWEPP',
                              params={'database': 'uniref90',
                                      'matrix': 'BLOSUM62',
                                      'alignments': 1000, 
                                      'gapalign': True, 
                                      'exp': '1e-3'})

# Check output
print(status, job_id)

True ncbiblast-R20200204-162639-0262-17487124-p2m


In [5]:
# Define results container
job_result = None

# Retruieve BLAST job results
while True:
    # Make request for job status
    status, job_status, _ = b.get_job_status(job_id)
    print('Job status: {:s}'.format(job_status))  # LOG
    # Check error
    if not status: break
    # Check if job has finished running
    if job_status == 'FINISHED':
        # Retrieve results
        status, job_result, _ = b.get_job_result(job_id)
        print('Job exited with status: {:s}'.format(job_status))  # LOG
        break  # Exit loop
    # Wait 10 seconds befroe making another call
    time.sleep(10)
    
# Check output
print(status, job_result)

Job status: RUNNING
Job status: RUNNING
Job status: RUNNING
Job status: RUNNING
Job status: RUNNING
Job status: FINISHED
Job exited with status: FINISHED
True [{'score': 182, 'bits': 74.7146, 'expectation': 1e-15, 'identity': 100.0, 'positives': 100.0, 'gaps': 0, 'strand': 'none/none', 'pattern_seq': 'VPSGWKAVFDDEYQTWYYVDLSTNSSQWEPP', 'match_seq': 'VPSGWKAVFDDEYQTWYYVDLSTNSSQWEPP', 'match_start': 11, 'match_end': 41, 'database': 'UR90', 'id': 'UniRef90_P43582', 'ac': 'WW', 'description': 'WW domain-containing protein WWM1 n=9 Tax=Saccharomyces TaxID=4930 RepID=WWM1_YEAST'}, {'score': 175, 'bits': 72.0182, 'expectation': 1.1e-14, 'identity': 93.5, 'positives': 100.0, 'gaps': 0, 'strand': 'none/none', 'pattern_seq': 'VPSGWKAVFDDEYQTW+YVDLSTN+SQWEPP', 'match_seq': 'VPSGWKAVFDDEYQTWFYVDLSTNNSQWEPP', 'match_start': 11, 'match_end': 41, 'database': 'UR90', 'id': 'UniRef90_J8Q8J2', 'ac': 'Wwm1p', 'description': 'Wwm1p n=1 Tax=Saccharomyces arboricola (strain H-6 / AS 2.3317 / CBS 10644) TaxID

In [6]:
# Turn job result into pandas DataFrame object
matches = pd.DataFrame(job_result)
matches

Unnamed: 0,score,bits,expectation,identity,positives,gaps,strand,pattern_seq,match_seq,match_start,match_end,database,id,ac,description
0,182,74.7146,1.000000e-15,100.0,100.0,0,none/none,VPSGWKAVFDDEYQTWYYVDLSTNSSQWEPP,VPSGWKAVFDDEYQTWYYVDLSTNSSQWEPP,11,41,UR90,UniRef90_P43582,WW,WW domain-containing protein WWM1 n=9 Tax=Sacc...
1,175,72.0182,1.100000e-14,93.5,100.0,0,none/none,VPSGWKAVFDDEYQTW+YVDLSTN+SQWEPP,VPSGWKAVFDDEYQTWFYVDLSTNNSQWEPP,11,41,UR90,UniRef90_J8Q8J2,Wwm1p,Wwm1p n=1 Tax=Saccharomyces arboricola (strain...
2,173,71.2478,1.800000e-14,93.5,100.0,0,none/none,VPSGWKAVFDDEYQTW+YV+LSTNSSQWEPP,VPSGWKAVFDDEYQTWFYVNLSTNSSQWEPP,11,41,UR90,UniRef90_J5RH20,WWM1-like,WWM1-like protein n=2 Tax=Saccharomyces TaxID=...
3,170,70.0922,4.700000e-14,93.5,96.8,0,none/none,VPSGWKAVFDDEYQTW+YVDLSTNSSQWE P,VPSGWKAVFDDEYQTWFYVDLSTNSSQWEAP,11,41,UR90,UniRef90_A0A0L8RJY2,WWM1-like,WWM1-like protein n=1 Tax=Saccharomyces eubaya...
4,158,65.4698,2.800000e-12,80.6,93.5,0,none/none,VP GWKAVFDDEY+TW+YV+L+TN SQWEPP,VPKGWKAVFDDEYKTWFYVNLATNQSQWEPP,11,41,UR90,UniRef90_A0A212M9M4,WW,WW domain-containing protein n=4 Tax=Zygosacch...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,100,43.1282,8.400000e-04,48.4,71.0,0,none/none,VP GW A ++D+Y+ W+YV+ T SQW+ P,VPEGWVARWNDQYKEWFYVNTFTKKSQWDKP,15,45,UR90,UniRef90_A0A136JIJ7,WW,WW domain-containing protein n=1 Tax=Microdoch...
447,100,43.1282,8.400000e-04,48.4,74.2,0,none/none,VP GW A ++++Y+ W+YV+L T SQW+ P,VPPGWIARWNEQYKEWFYVNLHTKQSQWDKP,19,49,UR90,UniRef90_A0A1S1W2W2,WW,WW domain-containing protein n=7 Tax=Colletotr...
448,100,43.1282,8.400000e-04,45.2,77.4,0,none/none,VP+GW A ++++Y+ W+YV++ T SQW+ P,VPAGWVARWNEQYKEWFYVNIYTKKSQWDKP,18,48,UR90,UniRef90_A0A2H2Z4R7,WW,WW domain-containing protein n=3 Tax=Trichoder...
449,100,43.1282,8.400000e-04,48.4,67.7,0,none/none,VP GW ++D+YQ W+YV+ T SQW+ P,VPEGWVVRWNDQYQEWFYVNTYTKKSQWDKP,15,45,UR90,UniRef90_A0A2T2ZXY2,WW,WW domain-containing protein n=1 Tax=Coniella ...


In [8]:
matches.describe()

Unnamed: 0,score,bits,expectation,identity,positives,gaps,match_start,match_end
count,451.0,451.0,451.0,451.0,451.0,451.0,451.0,451.0
mean,114.656319,48.773814,0.0001327664,57.154989,75.067849,0.019956,69.691796,99.667406
std,13.14684,5.064163,0.0002124278,8.240145,5.696292,0.244133,462.679213,462.680783
min,96.0,41.5874,1e-15,45.2,61.3,0.0,3.0,33.0
25%,104.0,44.669,6.1e-07,51.6,71.0,0.0,14.0,44.0
50%,111.0,47.3654,2.6e-05,54.8,74.2,0.0,15.0,45.0
75%,122.5,51.7952,0.00017,61.3,77.4,0.0,22.0,52.0
max,182.0,74.7146,0.00084,100.0,100.0,3.0,9492.0,9522.0
