In [2]:
import numpy as np
from Bio.PDB import PDBParser
import os
import matplotlib.pyplot as plt





In [3]:
# Let's parse out contact maps, or graphical representations of interactions between individual atoms in amino acid side chains
# The distance cutoff is 8.0 angstroms, or 10^-10 meters. This is the typical threshold for a meaningful proximity between atoms.

def parse_contact_map(pdb_file, distance_cutoff=8.0):
    parser = PDBParser()
    structure = parser.get_structure('protein', pdb_file)
    
    # Initialize contact map matrix
    num_atoms = sum(1 for _ in structure.get_atoms())
    contact_map = np.zeros((num_atoms, num_atoms), dtype=bool)
    
    # Calculate distances and populate contact map
    for atom1 in structure.get_atoms():
        for atom2 in structure.get_atoms():
            distance = atom1 - atom2
            if distance <= distance_cutoff:
                contact_map[atom1.get_id() - 1, atom2.get_id() - 1] = True
    
    return contact_map



In [4]:
def generate_contact_map(pdb_file, output_dir, distance_cutoff=8.0):
    # Parse PDB file and generate contact map
    contact_map = parse_contact_map(pdb_file, distance_cutoff)
    
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Save contact map to output directory
    pdb_name = os.path.splitext(os.path.basename(pdb_file))[0]
    output_file = os.path.join(output_dir, f'{pdb_name}_contact_map.npy')
    np.save(output_file, contact_map)

In [8]:
generate_contact_map("output_dir/A7MB80.cif", "contact_map_dir")

ValueError: Empty file.

In [None]:
#Generate loop so these functions pass through every PDB file inside the output_dir directory.
input_dir = 'output_dir'
output_dir = 'contact_map_dir'
#Use .cif files to generate contact maps, not .pdb
for filename in os.listdir(input_dir):
    if filename.endswith('.cif'):
        pdb_file = os.path.join(input_dir, filename)
        generate_contact_map(pdb_file, output_dir)

In [6]:

#Purge empty contact maps and keep the informative ones. Keeping empty contact maps clogs up the directory and hinders downstream analyses.
        
def purge_empty_contact_maps(contact_map_dir):
    """
    Purge empty contact maps from the specified directory.
    
    Parameters:
        contact_map_dir (str): Path to the directory containing contact map files.
    """
    for filename in os.listdir(contact_map_dir):
        if filename.endswith('.npy'):
            contact_map_path = os.path.join(contact_map_dir, filename)
            contact_map = np.load(contact_map_path)
            if contact_map.size == 0 or contact_map.max() == 0:
                os.remove(contact_map_path)
                print(f"Empty contact map '{filename}' removed.")

purge_empty_contact_maps('contact_map_dir')
        


Empty contact map 'Q9UNZ2_contact_map.npy' removed.
Empty contact map 'Q99PW4_contact_map.npy' removed.
Empty contact map 'Q14188_contact_map.npy' removed.
Empty contact map 'P58195_contact_map.npy' removed.
Empty contact map 'Q9EQE1_contact_map.npy' removed.
Empty contact map 'Q9C0D0_contact_map.npy' removed.
Empty contact map 'Q96IF1_contact_map.npy' removed.
Empty contact map 'Q9WUU8_contact_map.npy' removed.
Empty contact map 'Q9ESK9_contact_map.npy' removed.
Empty contact map 'Q1RMS5_contact_map.npy' removed.
Empty contact map 'P42681_contact_map.npy' removed.
Empty contact map 'Q8C9B9_contact_map.npy' removed.
Empty contact map 'P15533_contact_map.npy' removed.
Empty contact map 'Q66HD3_contact_map.npy' removed.
Empty contact map 'P35583_contact_map.npy' removed.
Empty contact map 'O35618_contact_map.npy' removed.
Empty contact map 'Q309Z6_contact_map.npy' removed.
Empty contact map 'O14776_contact_map.npy' removed.
Empty contact map 'P55263_contact_map.npy' removed.
Empty contac

In [None]:
#Visualize contact maps generated inside contact_map_dir by using matplotlib inside a function and putting that function in a for loop
#Due to lack of meaningful interactions, some contact maps may appear as blank, so we'll have to remove them for later analysis and saving space.

def plot_contact_map(contact_map, output_filename):
    if contact_map.size == 0:
        print("Empty contact map, skipping plot.")
        return
    
    plt.imshow(contact_map, cmap='binary', origin='lower')
    plt.xlabel('Residue Index')
    plt.ylabel('Residue Index')
    plt.title('Contact Map')
    plt.colorbar(label='Contact')
    plt.savefig(output_filename)
    plt.close()

In [None]:
# First define output directory (at least in the context of this particular notebook)
output_dir = 'contact_map_dir'
for filename in os.listdir(output_dir):
    if filename.endswith('.npy'):
        contact_map = np.load(os.path.join(output_dir, filename))
        output_filename = os.path.join(output_dir, os.path.splitext(filename)[0] + ".png")
        plot_contact_map(contact_map, output_filename)