In [1]:
import csv
import numpy as np
from rcsbapi.search import TextQuery
from rcsbapi.data import DataQuery as Query
from rcsbapi.search import search_attributes as attrs

In [63]:
resolution = 2.8
#Membrane proteins
q1 = attrs.rcsb_polymer_entity_annotation.type == 'PDBTM'
q2 = attrs.rcsb_polymer_entity_annotation.type == 'MemProtMD'
q3 = attrs.rcsb_polymer_entity_annotation.type == 'OPM'
q4 = attrs.rcsb_polymer_entity_annotation.type == 'mpstruc'
#Date
q5 = attrs.rcsb_accession_info.deposit_date >= '2000-01-01'
#Resolution x-ray
q6 = attrs.rcsb_entry_info.diffrn_resolution_high.value <= resolution
#Resolution cryo-EM
q7 = attrs.em_3d_reconstruction.resolution <= resolution
#Proteins
q8 = attrs.entity_poly.rcsb_entity_polymer_type == "Protein"

#Membrane search
query = (q1 | q2 | q3 | q4) & q5 & (q6 | q7) & q8
#All protein search
query = q5 & (q6 | q7) & q8

ids_list = list(query())

In [64]:
query = Query(
    input_type="entries",
    input_ids=ids_list,
    # Requesting "exptl" will return a query requesting exptl.method, exptl.details, etc
    #return_data_list=['rcsb_id', 'asym_ids', 'pdbx_seq_one_letter_code_can', 'rcsb_polymer_entity']
    return_data_list=['rcsb_id', 'polymer_entities.rcsb_polymer_entity_container_identifiers.asym_ids',\
                      'polymer_entities.entity_poly.pdbx_seq_one_letter_code_can',\
                      'polymer_entities.entity_poly.type']
)
result_dict = query.exec()



In [65]:
search_list = []
for i in result_dict['data']['entries']:
    pdb_id = i['rcsb_id']
    chains = ', '.join(i['polymer_entities'][0]['rcsb_polymer_entity_container_identifiers']['asym_ids'])
    sequence = i['polymer_entities'][0]['entity_poly']['pdbx_seq_one_letter_code_can']
    mol_type = i['polymer_entities'][0]['entity_poly']['type']
    search_list.append([pdb_id,chains,sequence,mol_type])

In [5]:
path = "./data/high_resolution_2dot8_membrane_protein_sequences_after_2000.csv"
with open(path, "w", newline='') as f:
    writer = csv.writer(f)
    writer.writerows(search_list)   # writes all the inner lists as rows