In [1]:
# Some utility classes to represent a PDB structure

class Atom:
    """
    A simple class for an amino acid residue
    """

    def __init__(self, type):
        self.type = type
        self.coords = (0.0, 0.0, 0.0)

    # Overload the __repr__ operator to make printing simpler.
    def __repr__(self):
        return self.type
    #for the atoms we can determine their coordinates!

class Residue:
    """
    A simple class for an amino acid residue
    """

    def __init__(self, type, number):
        self.type = type
        self.number = number
        self.atoms = []

    # Overload the __repr__ operator to make printing simpler.
    def __repr__(self):
        return "{0} {1}".format(self.type, self.number)
    #here we take the active site's residues and can sub classify it by the type, number of residues, an the atoms 
    #making up residues

class ActiveSite:
    """
    A simple class for an active site
    """

    def __init__(self, name):
        self.name = name
        self.residues = []

    # Overload the __repr__ operator to make printing simpler.
    def __repr__(self):
        return self.name
    #basically, you start with a pdb file that's an active site. Then this active site becomes the referenced point, under which there are 
    #different sub classes we can reference next. Here we reference the active site residues!

In [2]:
import glob
import os
#deleted referencing utils


def read_active_sites(dir):
    """
    Read in all of the active sites from the given directory.
    Input: directory
    Output: list of ActiveSite instances
    """
    files = glob.glob(dir + '/*.pdb')

    active_sites = []
    # iterate over each .pdb file in the given directory
    for filepath in glob.iglob(os.path.join(dir, "*.pdb")):

        active_sites.append(read_active_site(filepath))

    print("Read in %d active sites"%len(active_sites))

    return active_sites


def read_active_site(filepath):
    """
    Read in a single active site given a PDB file
    Input: PDB file path
    Output: ActiveSite instance
    """
    basename = os.path.basename(filepath)
    name = os.path.splitext(basename)

    if name[1] != ".pdb":
        raise IOError("%s is not a PDB file"%filepath)

    active_site = ActiveSite(name[0])

    r_num = 0

    # open pdb file
    with open(filepath, "r") as f:
        # iterate over each line in the file
        for line in f:
            if line[0:3] != 'TER':
                # read in an atom
                atom_type = line[13:17].strip()
                x_coord = float(line[30:38])
                y_coord = float(line[38:46])
                z_coord = float(line[46:54])
                atom = Atom(atom_type)
                atom.coords = (x_coord, y_coord, z_coord)

                residue_type = line[17:20]
                residue_number = int(line[23:26])

                # make a new residue if needed
                if residue_number != r_num:
                    residue = Residue(residue_type, residue_number)
                    r_num = residue_number

                # add the atom to the residue
                residue.atoms.append(atom)

            else:  # I've reached a TER card
                active_site.residues.append(residue)

    return active_site


def write_clustering(filename, clusters):
    """
    Write the clustered ActiveSite instances out to a file.
    Input: a filename and a clustering of ActiveSite instances
    Output: none
    """

    out = open(filename, 'w')

    for i in range(len(clusters)):
        out.write("\nCluster %d\n--------------\n" % i)
        for j in range(len(clusters[i])):
            out.write("%s\n" % clusters[i][j])

    out.close()


def write_mult_clusterings(filename, clusterings):
    """
    Write a series of clusterings of ActiveSite instances out to a file.
    Input: a filename and a list of clusterings of ActiveSite instances
    Output: none
    """

    out = open(filename, 'w')

    for i in range(len(clusterings)):
        clusters = clusterings[i]

        for j in range(len(clusters)):
            out.write("\nCluster %d\n------------\n" % j)
            for k in range(len(clusters[j])):
                out.write("%s\n" % clusters[j][k])

    out.close()

In [3]:
pdb_data_path = "/Users/lcech/BMI203/hw2-skeleton/data/"
#this is making the pathname a variable that we can call later on"

In [4]:
active_site_list = read_active_sites(pdb_data_path)
#test is a list of active sites that occur in our path

Read in 136 active sites


In [5]:
len(active_site_list)
#there are 136 active sites!

136

In [6]:
first_active_site = active_site_list[0]
first_active_site
#we are calling this new variable as the first active site in the active_site_list 
#not very informative name!

46495

In [24]:
first_active_site.residues


[ASP 165, ASP 167, SER 211, ARG 213, ASP 254, LYS 258, ASP 278]

In [7]:
first_active_site.name
#still not informative

'46495'

In [8]:
first_active_site.residues
#now we can reference information contained in the variable

[ASP 165, ASP 167, SER 211, ARG 213, ASP 254, LYS 258, ASP 278]

In [9]:
type(first_active_site.residues)
#this residue information is a list, which is ordered from [0:?] that we can reference later on

list

In [10]:
first_active_site.residues[0].atoms
#we now reference the first residue that makes up the first active site, and we can see which atoms make it up

[N, CA, C, O, CB, CG, OD1, OD2]

In [11]:
type(first_active_site.residues[0].atoms)
#this is also a list, which we can reference within again!

list

In [12]:
first_active_site.residues[0].atoms[0].coords
#for that first atom, N, we can determine its coordinates

(41.692, 10.964, 19.961)

In [13]:
#lets check out the second one
second_active_site = active_site_list[1]
second_active_site.residues


[SER 39, GLU 211, ASP 246, GLU 295, ASP 320, LYS 345]

In [14]:
len(second_active_site.residues)

6

In [15]:
#lets check out the next io function "read_active_site" which is for a single active site pdb file

In [16]:
pdb_file_path = "/Users/lcech/BMI203/hw2-skeleton/data/46495.pdb"

In [17]:
active_site_46495 = read_active_site(pdb_file_path)

In [18]:
type(active_site_46495)
#out : __main__.ActiveSite

__main__.ActiveSite

In [19]:
active_site_46495.name


'46495'

In [20]:
active_site_46495.residues


[ASP 165, ASP 167, SER 211, ARG 213, ASP 254, LYS 258, ASP 278]

In [21]:
active_site_46495.residues[0].type

'ASP'

In [22]:
active_site_46495.residues[0].atoms

[N, CA, C, O, CB, CG, OD1, OD2]

In [23]:
active_site_46495.residues[0].atoms[0].coords

(41.692, 10.964, 19.961)

In [None]:
#this method also gives us the same information