# GASS-PPI

## Section 0: Libraries, Global Variables, Utility Functions

In [35]:
import subprocess
import re
import numpy as np
from Bio.PDB import *

current_directory = "/Users/albertdarmawan/Documents/gass-ppi/core/"
pdb_parser = PDBParser()

In [36]:
# Residue is represented as a Last Heavy Atom (LHA)
# Residue is a "gene" in genetic algorithms
# Individual is a list of residue
# 2 residue info, 1 chain info, 2 atom (lha) info
class Residue:
    def __init__(self, residue_name, residue_sequence_position, chain_name, lha_name, lha_coordinates):
        self.residue_name = residue_name
        self.residue_sequence_position = residue_sequence_position
        self.chain_name = chain_name
        self.lha_name = lha_name
        self.lha_coordinates = lha_coordinates

In [37]:
# Euclidean Distance
# Given two 3D coordinates (1D NumPy), calculate its Euclidean distance
def euclidean_distance(coordinate_1, coordinate_2):
    return float(np.sqrt(((coordinate_1[0] - coordinate_2[0]) ** 2) +
                   ((coordinate_1[1] - coordinate_2[1]) ** 2) +
                   ((coordinate_1[2] - coordinate_2[2]) ** 2)))

In [38]:
# Using Sandro's script, generate a new PDB file with only its LHA
def generate_lha_file(pdb_id):
    pdb_file_name = pdb_id + ".pdb"
    new_thread = subprocess.run(["python", "pdb-preprocessing.py", current_directory, pdb_file_name], capture_output=True, text=True)
    print(new_thread.stdout)

In [39]:
# TMAlign Structural Alignment (https://zhanggroup.org/TM-score/)
# Compare structural similarities between two PDB structures (regardless of the rotation)
# Return a TMscore in float
# TM-Score is between (0, 1], 1 indicates a perfect match. >0.5 is similar enough. <0.17 is two unrelated structures.
def tmalign_structural_alignment(pdb_id_1, pdb_id_2):
    pdb_id_1_filename = pdb_id_1.lower() + ".pdb"
    pdb_id_2_filename = pdb_id_2.lower() + ".pdb"
    # Execute TMAlign
    tmalign_thread = subprocess.run(["./TMalign", pdb_id_1_filename, pdb_id_2_filename], capture_output=True, text=True)
    output_text = tmalign_thread.stdout

    # Retrieved TMScore from TMAlign results
    tmscore_raw_list = re.findall("TM-score=\s[0-9]+.[0-9]+", output_text)

    # Convert the TMScore into floats, then get the maximum TMScore
    tmscore_list = list(map(lambda x: float(re.sub("TM-score=\s", "", x)), tmscore_raw_list))
    max_tmscore = max(tmscore_list)
    return max_tmscore

tmscore = tmalign_structural_alignment("1pme", "4n4s")
print(tmscore)

0.94456


## Section 1: Proof-of-Concept (with Antigen-Antibody in DBD5)

In [22]:
# Main Program
input_pdb_id = "3nos"

# Step 1: Find structural similarities with other protein

# Step 2: 

# Step 3: 