# SARS-CoV-2 Crystal $M_{pro}$ Structure Analysis

In [2]:
import os
import glob
import requests
from Bio.PDB import PDBList, PDBParser

## Functions

In [3]:
# Find all pdb files in a directory and return them as a list
def find_pdb_files(directory):
    """
    Finds all .pdb files in the specified directory and returns them as a list.

    Parameters:
    - directory: The path to the directory where .pdb files should be searched.

    Returns:
    - List of paths to .pdb files found in the directory.
    """
    # Use glob to find all .pdb files in the directory
    pdb_files = glob.glob(os.path.join(directory, '*.pdb'))

    # Optional: Print the list of found .pdb files
    print("Found PDB files:")
    for file in pdb_files:
        print(file)

    # Return the list of PDB files
    return pdb_files

In [None]:
# Function to download PDB files
PDB_BASE_URL = "https://files.rcsb.org/download/"

def download_pdbs(pdb_codes_file, output_dir):
    """
    Download PDB structures from a text file containing PDB codes.

    Parameters:
    - pdb_codes_file: Path to the text file containing PDB codes (one per line)
    - output_dir: Directory to save downloaded PDB files
    """
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Read PDB codes from the file
    with open(pdb_codes_file, 'r') as f:
        pdb_codes = [line.strip().upper() for line in f if line.strip()]

    # Download each PDB structure
    for pdb_code in pdb_codes:
        try:
            pdb_url = f"{PDB_BASE_URL}{pdb_code}.pdb"
            response = requests.get(pdb_url)
            response.raise_for_status()  # Check for request errors

            pdb_file_path = os.path.join(output_dir, f"{pdb_code}.pdb")
            with open(pdb_file_path, 'wb') as pdb_file:
                pdb_file.write(response.content)

            print(f"Successfully downloaded: {pdb_code}")
        except Exception as e:
            print(f"Failed to download {pdb_code}: {e}")

    print(f"Download complete. Files are saved in: {output_dir}")

    return pdb_codes

In [1]:
# Functions to get resolutions of crystal structures from pdb files
def get_pdb_resolution(pdb_code):
    """
    Fetches the resolution of a PDB structure from the downloaded file.

    Parameters:
    - pdb_code: The 4-character PDB code

    Returns:
    - Resolution (float) if available, otherwise None
    """
    # Initialize PDBList to download the structure
    pdbl = PDBList()

    # Download the PDB file (in .ent format by default)
    pdb_file = pdbl.retrieve_pdb_file(pdb_code, pdir=".", file_format="pdb")

    # Parse the downloaded PDB file
    parser = PDBParser(QUIET=True)

    try:
        structure = parser.get_structure(pdb_code, pdb_file)
        # Extract resolution from the header
        resolution = structure.header.get('resolution')
        return resolution
    except Exception as e:
        print(f"Error processing {pdb_code}: {e}")
        return None

def fetch_resolutions_from_file(pdb_codes_file):
    """
    Reads a list of PDB codes from a text file and fetches their resolutions.

    Parameters:
    - pdb_codes_file: Path to the text file containing PDB codes (one per line)

    Returns:
    - A dictionary with PDB codes as keys and their resolutions as values
    """
    resolutions = {}

    # Read PDB codes from the file
    with open(pdb_codes_file, 'r') as f:
        pdb_codes = [line.strip().upper() for line in f if line.strip()]

    # Fetch resolution for each PDB code
    for pdb_code in pdb_codes:
        resolution = get_pdb_resolution(pdb_code)
        resolutions[pdb_code] = resolution
        print(f"PDB Code: {pdb_code}, Resolution: {resolution}")

    return resolutions


# Scripts

In [None]:
# Download PDB files into Directory
download_pdbs("covid_moonshot_pdb_ids.txt","./Moonshot_crystal_structures/")

In [4]:
# Find all .pdb files in the directory
pdb_files = find_pdb_files("./Moonshot_crystal_structures/")

Found PDB files:
./Moonshot_crystal_structures/7GKJ.pdb
./Moonshot_crystal_structures/7GH0.pdb
./Moonshot_crystal_structures/7GHQ.pdb
./Moonshot_crystal_structures/7GI4.pdb
./Moonshot_crystal_structures/7GJN.pdb
./Moonshot_crystal_structures/7GIU.pdb
./Moonshot_crystal_structures/7GHF.pdb
./Moonshot_crystal_structures/7GJ8.pdb
./Moonshot_crystal_structures/7GIB.pdb
./Moonshot_crystal_structures/7GJY.pdb
./Moonshot_crystal_structures/7GDR.pdb
./Moonshot_crystal_structures/7GGI.pdb
./Moonshot_crystal_structures/7GD3.pdb
./Moonshot_crystal_structures/7GEV.pdb
./Moonshot_crystal_structures/7GE7.pdb
./Moonshot_crystal_structures/7GFM.pdb
./Moonshot_crystal_structures/7GDE.pdb
./Moonshot_crystal_structures/7GFZ.pdb
./Moonshot_crystal_structures/7GEA.pdb
./Moonshot_crystal_structures/7GDD.pdb
./Moonshot_crystal_structures/7GFL.pdb
./Moonshot_crystal_structures/7GE6.pdb
./Moonshot_crystal_structures/7GEW.pdb
./Moonshot_crystal_structures/7GD2.pdb
./Moonshot_crystal_structures/7GGH.pdb
./Moonsh

In [None]:
# Get the PDB ids from the text file
with open("covid_moonshot_pdb_ids.txt", "r") as f:
    pdb_codes = [line.strip() for line in f]