# Search RCSB with PDB IDs
## Goal
Given a series of DALI hits with PDB IDs, I wish to obtain attributes pertaining to their "Classification", which contains cell adhesion keywords.

In [1]:
from pypdb import *
import csv

Get a general description of the entry's metadata

In [2]:
exmp = get_info("2ODL")
exmp.keys()

dict_keys(['audit_author', 'cell', 'citation', 'diffrn', 'diffrn_detector', 'diffrn_radiation', 'diffrn_source', 'entry', 'exptl', 'exptl_crystal', 'exptl_crystal_grow', 'pdbx_audit_revision_details', 'pdbx_audit_revision_group', 'pdbx_audit_revision_history', 'pdbx_database_status', 'pdbx_vrpt_summary', 'rcsb_accession_info', 'rcsb_entry_container_identifiers', 'rcsb_entry_info', 'rcsb_primary_citation', 'refine', 'refine_hist', 'refine_ls_restr', 'reflns', 'reflns_shell', 'software', 'struct', 'struct_keywords', 'symmetry', 'rcsb_id'])

The relevant information is in the 'struct_keywords'

In [3]:
exmp.get('struct_keywords')

{'pdbx_keywords': 'CELL ADHESION',
 'text': 'HMW1, secretion domain, beta helix, CELL ADHESION'}

Now let's check all the entries one-by-one. First, we will load the CSV file containing all the hits.

In [70]:
IN = "CaurHil1-PF11765-PDB50-parsed.csv"
OUT = "CaurHil1-PF11765-PDB50-appended.csv"
fnames = ['No', 'Chain', 'Z', 'rmsd', 'lali', 'nres',
          '%id PDB', 'Description', 'keyword']

with open(IN, 'r') as infile, open(OUT, 'w') as outfile:
    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames = fnames)
    
    writer.writeheader()
    
    row_count = 0
    
    for row in reader:
        id = row['Chain'].split('-')[0]
        info = get_info(id)
        if info is None:
            row['keyword'] = 'NA'
        else:
            keyword = info.get('struct_keywords').get('pdbx_keywords')
            if keyword is None:
                row['keyword'] = 'NA'
            else:
                row['keyword'] = keyword
        writer.writerow(row)
        row_count += 1