# Download PDB files used for Identifying Local Epitope Residues

Run the cells below to download PDB files used within the analysis.

## Import required packages

In [5]:
import os
import sys
import pandas as pd
import urllib.request
import Bio.PDB

## Import PDB file information

In [6]:
pdbids = pd.read_csv("additional_data/pdb_identifiers.csv")
pdbids

Unnamed: 0,Antibody,Short_Name,PDB_id,Primary_Chains,Secondary_Chains
0,Casirivimab,CoV_binder_1,6XDG,e,bd
1,Imdevimab,CoV_binder_2,7ZJL,abc,ghijkl
2,Bamlanivimab,CoV_binder_3,7KMG,cf,abde
3,Regdanvimab,CoV_binder_4,7CM4,a,hi
4,Tixagevimab,CoV_binder_6,7L7D,e,hl
5,Cilgavimab,CoV_binder_7,8SUO,a,im
6,Etesevimab,CoV_binder_8,7F7E,e,cl
7,COR-101,CoV_binder_9,7B3O,e,hl
8,CC12.1,CoV_binder_10,8CWV,a,hl
9,Fab-52,CoV_binder_11,7K9Z,e,hl


## Function to download PDB file from source

In [7]:
def download_pdb_file(pdb_id, data_dir, base_url = "https://files.rcsb.org/download/"):
    """
    Download PDB files to data directory.
    Inputs:
      - pdb_ids: string identifier for PDB file.
      - data_dir: string path to download PDB file.
      - base_url: string url path to download PDB files (source).
    Outputs:
      - Print statements indicating downloads or errors.
      - File saved to local path given by 'data_dir' input.
    """
    pdb_file_name = pdb_id + ".pdb"
    source_path = base_url + pdb_file_name
    output_path = os.path.join(data_dir, pdb_file_name)
    print(f"Request {pdb_id} file from {source_path} to be downloaded to {output_path}.")
    try:
        urllib.request.urlretrieve(source_path, output_path)
        return(output_path)
    except Exception as err:
        print(str(err), file = sys.stderr)
        return None

## Create PDB file directory if it doesn't exist

In [8]:
data_dir = "./pdb_files/"
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

## Download PDB files

In [9]:
for i in range(pdbids.shape[0]):
    download_pdb_file(pdbids.PDB_id[i], data_dir)

Request 6XDG file from https://files.rcsb.org/download/6XDG.pdb to be downloaded to ./pdb_files/6XDG.pdb.
Request 7ZJL file from https://files.rcsb.org/download/7ZJL.pdb to be downloaded to ./pdb_files/7ZJL.pdb.
Request 7KMG file from https://files.rcsb.org/download/7KMG.pdb to be downloaded to ./pdb_files/7KMG.pdb.
Request 7CM4 file from https://files.rcsb.org/download/7CM4.pdb to be downloaded to ./pdb_files/7CM4.pdb.
Request 7L7D file from https://files.rcsb.org/download/7L7D.pdb to be downloaded to ./pdb_files/7L7D.pdb.
Request 8SUO file from https://files.rcsb.org/download/8SUO.pdb to be downloaded to ./pdb_files/8SUO.pdb.
Request 7F7E file from https://files.rcsb.org/download/7F7E.pdb to be downloaded to ./pdb_files/7F7E.pdb.
Request 7B3O file from https://files.rcsb.org/download/7B3O.pdb to be downloaded to ./pdb_files/7B3O.pdb.
Request 8CWV file from https://files.rcsb.org/download/8CWV.pdb to be downloaded to ./pdb_files/8CWV.pdb.
Request 7K9Z file from https://files.rcsb.org/

In [10]:
print("Operation has completed.")

Operation has completed.


## End of notebook