In [3]:
import matplotlib
# Standard library packages
import io

# Import Seaborn for graphics and plotting
import seaborn as sns

# Import bioservices module, to run remote UniProt queries
from bioservices import UniProt

# Import Pandas, so we can use dataframes
import pandas as pd

from Bio import SeqIO


# Show plots as part of the notebook
%matplotlib inline


## Connecting to UniProt using bioservices:


In [4]:
service = UniProt() # it is good practice to have a meaningful variable name


## Constructing a query:

In [3]:
# query = "Q9AJE3"  # this query means we want to look in all fields for Q9AJE3
# query = "comment(SUBCELLULAR LOCATION): organism:human reviewed:yes"
membrane_query = "go:0016020 organism:human reviewed:yes"

secreted_query = "go:0005576 organism:human reviewed:yes"

cytoplasm_query = "go:0005737 organism:human reviewed:yes"


# taxonomy:"Mammalia [40674]" AND locations:(location:"Secreted [SL-0243]") AND reviewed:yes

## Performing the query:

### Getting results and saving as fasta for Human Secreted proteins:

In [10]:
secreted_result = service.search(secreted_query, frmt="fasta")
secreted_outfile = 'human_secreted.fasta'
with open(secreted_outfile, 'a') as ofh:
    ofh.write(secreted_result)

### Getting results and saving as fasta for Human Membrane proteins:

In [11]:
membrane_result = service.search(membrane_query, frmt="fasta")
membrane_outfile = 'human_membrane.fasta'
with open(membrane_outfile, 'a') as ofh:
    ofh.write(membrane_result)

### Getting results and saving as fasta for Human Cytoplasmic proteins:

In [12]:
cytoplasm_result = service.search(cytoplasm_query, frmt="fasta")
cytoplasm_outfile = 'human_cytoplasm.fasta'
with open(cytoplasm_outfile, 'a') as ofh:
    ofh.write(cytoplasm_result)

# Repeating for bacteria to get more data

### bacteria membrane:

In [5]:
prokaryote_membrane_query = "go:0016020 taxonomy:bacteria reviewed:yes"
membrane_result = service.search(prokaryote_membrane_query, frmt="fasta")

In [6]:
membrane_outfile = 'bacteria_membrane.fasta'
with open(membrane_outfile, 'a') as ofh:
    ofh.write(membrane_result)

### bacteria membrane:

In [8]:
prokaryote_secreted_query = "go:0005576 taxonomy:bacteria reviewed:yes"
secreted_result = service.search(prokaryote_secreted_query, frmt="fasta")

In [9]:
secreted_outfile = 'bacteria_secreted.fasta'
with open(secreted_outfile, 'a') as ofh:
    ofh.write(secreted_result)

### bacteria membrane:

In [10]:
prokaryote_cytoplasm_query = "go:0005737 taxonomy:bacteria reviewed:yes"
cytoplasm_result = service.search(prokaryote_cytoplasm_query, frmt="fasta")

In [11]:
cytoplasm_outfile = 'bacteria_cytoplasm.fasta'
with open(cytoplasm_outfile, 'a') as ofh:
    ofh.write(cytoplasm_result)

# Repeating for all mammals to get more data

### mammal membrane:

In [12]:
mammal_membrane_query = "go:0016020 taxonomy:mammalia reviewed:yes"
membrane_result = service.search(mammal_membrane_query, frmt="fasta")

In [13]:
membrane_outfile = 'mammal_membrane.fasta'
with open(membrane_outfile, 'a') as ofh:
    ofh.write(membrane_result)

### mammal membrane:

In [14]:
mammal_secreted_query = "go:0005576 taxonomy:mammalia reviewed:yes"
secreted_result = service.search(mammal_secreted_query, frmt="fasta")

In [15]:
secreted_outfile = 'mammal_secreted.fasta'
with open(secreted_outfile, 'a') as ofh:
    ofh.write(secreted_result)

### mammal membrane:

In [16]:
mammal_cytoplasm_query = "go:0005737 taxonomy:mammalia reviewed:yes"
cytoplasm_result = service.search(mammal_cytoplasm_query, frmt="fasta")

In [17]:
cytoplasm_outfile = 'mammal_cytoplasm.fasta'
with open(cytoplasm_outfile, 'a') as ofh:
    ofh.write(cytoplasm_result)

# Repeating for yeast to get more data

### yeast membrane:

In [18]:
yeast_membrane_query = "go:0016020 taxonomy:saccharomyceta reviewed:yes"
membrane_result = service.search(yeast_membrane_query, frmt="fasta")

In [19]:
membrane_outfile = 'yeast_membrane.fasta'
with open(membrane_outfile, 'a') as ofh:
    ofh.write(membrane_result)

### yeast membrane:

In [20]:
yeast_secreted_query = "go:0005576 taxonomy:saccharomyceta reviewed:yes"
secreted_result = service.search(yeast_secreted_query, frmt="fasta")

In [21]:
secreted_outfile = 'yeast_secreted.fasta'
with open(secreted_outfile, 'a') as ofh:
    ofh.write(secreted_result)

### yeast membrane:

In [22]:
yeast_cytoplasm_query = "go:0005737 taxonomy:saccharomyceta reviewed:yes"
cytoplasm_result = service.search(yeast_cytoplasm_query, frmt="fasta")

In [23]:
cytoplasm_outfile = 'yeast_cytoplasm.fasta'
with open(cytoplasm_outfile, 'a') as ofh:
    ofh.write(cytoplasm_result)