Q1. Write a program to read the content of the PDB file into the dict data type in the following form:

x[RESID][ATOMNAME]=[Xcord,Ycord,Zcord]

x[RESID]['resname']=RESNAME



for instance, using x[5]['resname'] should return residue name and x[5][CA] should return

list with coordinates.

In [None]:
# Opening the PDB file and reading
with open("/content/atom_cord.txt", "r") as pdb_file:
    lines = pdb_file.readlines()


pdb_data = {}

#Using strip
for line in lines:
  if line.startswith('ATOM'):
    resid = int(line[22:26])
    resname = line[17:20].strip()
    atomname = line[12:16].strip()
    x = float(line[31:38])
    y = float(line[39:46])
    z = float(line[47:54])
    #print(x)

    #adding the data
    if resid not in pdb_data:
        pdb_data[resid] = {'resname': resname}


    pdb_data[resid][atomname] = [x, y, z]

#printing the whole data
for resid, data in pdb_data.items():
    print(f"RESID: {resid}")
    print(f"RESNAME: {data['resname']}")
    for atomname, coordinates in data.items():
        if atomname != 'resname':
            print(f"{atomname}: {coordinates}")
    print("************************************")


 a. Find the composition of the residues. (occurrence of amino acids in the PDB file).

In [None]:
composition = {}

# Iterate
for resid, data in pdb_data.items():
    resname = data['resname']

    # Increment
    composition[resname] = composition.get(resname, 0) + 1

# Print the composition of residues
print("Composition of residues:")
for resname, count in composition.items():
    print(f"{resname}: {count}")


Composition of residues:
LEU: 27
SER: 17
ALA: 32
GLU: 28
ASP: 26
LYS: 30
VAL: 19
ARG: 18
MET: 8
ILE: 26
ASN: 15
GLY: 17
THR: 20
GLN: 13
HIS: 6
TYR: 13
CYS: 9
PHE: 19
TRP: 3
PRO: 4


b. Find whether a residue has the same number of atoms. Find the average number of atoms.

In [None]:
def countss(pdb_data):
    residue_counts = {}
    for resid, atoms in pdb_data.items():
        resname = atoms['resname']
        atom_count = len(atoms) - 1
        residue_counts[(resid, resname)] = atom_count
    return residue_counts

atom_counts_id = countss(pdb_data)
#print("Residue ID : Residue Name : Number of Atoms")
#for (resid, resname), count in atom_counts_id.items():
#    print(f"{resid} : {resname} : {count}")



def uniq(countss):
    varying_atom_counts = {}
    for (resid, resname), count in countss.items():
        if resname not in varying_atom_counts:
            varying_atom_counts[resname] = set()
        varying_atom_counts[resname].add(count)

    different_atom_counts = {}
    for resname, counts in varying_atom_counts.items():
        if len(counts) > 1:
            different_atom_counts[resname] = counts

    return different_atom_counts


residues_with_varying_counts = uniq(atom_counts_id)

print("Residue Name : Varying Atom Counts")
for resname, counts in residues_with_varying_counts.items():
    print(f"{resname} : {counts}")


Residue Name : Varying Atom Counts
LEU : {8, 4}
PHE : {10, 11}


In [None]:
print("For LEU and PHE, the number of atoms differs")

For LEU and PHE, the number of atoms differs


In [None]:
def calculate(atom_counts_with_resid):
    total_atoms = 0
    residue_count = 0
    for (resid, resname), count in atom_counts_with_resid.items():
        total_atoms += count
        residue_count += 1
    if residue_count == 0:
        return 0
    return total_atoms / residue_count

# Calculate average number of atoms
average_atom_counts = calculate(atom_counts_with_resid)

print("Average number of atoms per residue:", average_atom_counts)


Average number of atoms per residue: 8.0


c. Find the average distance between two consecutive CA residues using the formula:

       v1=[x1,x2,x3] and v2=[y1,y2,y3]

       vdiff=v2-v1 (vector difference)

       d=sqrt(vdiff.vdiff)

In [None]:
import math

def calculate_distance(v1, v2):
    vdiff = [v2[i] - v1[i] for i in range(3)]
    return math.sqrt(sum(v**2 for v in vdiff))


total_distance = 0
ca_count = 0
previous_ca_coordinates = None
residue_ids = sorted(pdb_data.keys())


for resid in residue_ids:
    data = pdb_data[resid]
    if 'CA' in data:
        ca_count += 1
        ca_coordinates = data['CA']
        if previous_ca_coordinates:
            distance = calculate_distance(previous_ca_coordinates, ca_coordinates)
            total_distance += distance
        previous_ca_coordinates = ca_coordinates


if ca_count > 1:
    average_distance = total_distance / (ca_count - 1)
    print("The average distance between consecutive CA residues is: ",average_distance)



The average distance between consecutive CA residues is:  3.8001963748470757
