In [1]:

import pandas as pd
import Bio.PDB
from Bio.PDB.PDBIO import PDBIO


# upload data:

#### Upload entropy difference values:

In [2]:
# upload entropy difference dataframe:
dS_df = pd.read_csv('./dS_SH3_functional_positions.csv')
dS_df.head() #display dataframe columns

Unnamed: 0,position59_map_to_2VKN,entropy_diff_hull,entropy_diff_func
0,2,1.460799,1.155686
1,3,2.26877,1.9251
2,4,1.146782,1.035175
3,5,2.798589,2.954512
4,6,2.087943,2.317778


#### Upload Sh3-sho1 pdb: 2VKN


In [3]:
# upload pdb file onto notebook

path = './2vkn.pdb' # your file path here
p = Bio.PDB.PDBParser()
structure = p.get_structure('myStructureName', path) # get structure info



In [4]:
"""
 B-factor is information in the pdb that allows us to annotate structure.
 However, we will need to swap the values with entropy difference uploaded
 from above
"""
bfactor = [a.get_bfactor() for a in structure.get_atoms() ]

In [5]:
"""
 Match the atoms in the pdb file.
"""
atom_name = [a.get_id() for a in structure.get_atoms() ]

In [6]:
# create a convenient dictionary for containing the data
sh3_atom_bfactors = {
    'atom': atom_name,
    'bfactor': bfactor
}

### Swap b-factor values with entropy difference values
In this case, we will swap values between b-factor values and entropy difference
between all versus functional sequences.

In [7]:
new_bfactor = []

jj = 0 # index for searching in the dataframe 
for ii, res in enumerate(structure.get_residues()):
    
    # python index starts at 0, so we will reindex it to match sequence
    seq_pos = ii + 1 
    
    try:
        # match whether this sequence position 
        if seq_pos == dS_df.position59_map_to_2VKN.values[jj]:


            for atom in res:
                print('Add dS:', atom.get_id())
                new_bfactor.append(
                dS_df.entropy_diff_func[jj]
                )
                
            jj = jj + 1

        else:
            for atom in res:
                print('Add 0:', atom.get_id())
                new_bfactor.append(0)
    except:
        if ii > len(bfactor):
            quit()
        else:
            for atom in res:
                new_bfactor.append(0)
      
            

Add 0: N
Add 0: CA
Add 0: C
Add 0: O
Add 0: CB
Add 0: CG
Add 0: OD1
Add 0: OD2
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: CE1
Add dS: CE2
Add dS: CZ
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG1
Add dS: CG2
Add dS: CD1
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: CE1
Add dS: CE2
Add dS: CZ
Add dS: OH
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD
Add dS: CE
Add dS: NZ
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD
Add dS: CE
Add dS: NZ
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: CE1
Add dS: CE2
Add dS: CZ
Add dS: OH
Add dS: N


In [8]:
# here, this cell edits the actuall pdb object.

for ii, atom in enumerate(structure.get_atoms()):

    print(ii, atom, atom.get_bfactor())
    atom.set_bfactor(new_bfactor[ii])
    print('new_bfactor:', atom.get_bfactor())

0 <Atom N> 42.26
new_bfactor: 0
1 <Atom CA> 41.17
new_bfactor: 0
2 <Atom C> 40.43
new_bfactor: 0
3 <Atom O> 40.36
new_bfactor: 0
4 <Atom CB> 42.77
new_bfactor: 0
5 <Atom CG> 43.41
new_bfactor: 0
6 <Atom OD1> 50.08
new_bfactor: 0
7 <Atom OD2> 50.71
new_bfactor: 0
8 <Atom N> 38.86
new_bfactor: 1.1556855023857886
9 <Atom CA> 36.94
new_bfactor: 1.1556855023857886
10 <Atom C> 36.08
new_bfactor: 1.1556855023857886
11 <Atom O> 37.77
new_bfactor: 1.1556855023857886
12 <Atom CB> 36.98
new_bfactor: 1.1556855023857886
13 <Atom N> 32.99
new_bfactor: 1.925099791951915
14 <Atom CA> 30.78
new_bfactor: 1.925099791951915
15 <Atom C> 30.08
new_bfactor: 1.925099791951915
16 <Atom O> 28.31
new_bfactor: 1.925099791951915
17 <Atom CB> 28.41
new_bfactor: 1.925099791951915
18 <Atom CG> 24.03
new_bfactor: 1.925099791951915
19 <Atom CD1> 28.42
new_bfactor: 1.925099791951915
20 <Atom CD2> 21.08
new_bfactor: 1.925099791951915
21 <Atom CE1> 23.73
new_bfactor: 1.925099791951915
22 <Atom CE2> 25.11
new_bfactor: 1.92

new_bfactor: 0.6509243815208869
183 <Atom CA> 35.94
new_bfactor: 0.6509243815208869
184 <Atom C> 35.2
new_bfactor: 0.6509243815208869
185 <Atom O> 35.26
new_bfactor: 0.6509243815208869
186 <Atom CB> 36.17
new_bfactor: 0.6509243815208869
187 <Atom CG1> 35.07
new_bfactor: 0.6509243815208869
188 <Atom CG2> 36.71
new_bfactor: 0.6509243815208869
189 <Atom CD1> 34.3
new_bfactor: 0.6509243815208869
190 <Atom N> 33.91
new_bfactor: 2.5769578493125462
191 <Atom CA> 33.14
new_bfactor: 2.5769578493125462
192 <Atom C> 33.93
new_bfactor: 2.5769578493125462
193 <Atom O> 35.51
new_bfactor: 2.5769578493125462
194 <Atom CB> 32.28
new_bfactor: 2.5769578493125462
195 <Atom OG> 33.0
new_bfactor: 2.5769578493125462
196 <Atom N> 32.4
new_bfactor: 1.83510772993008
197 <Atom CA> 29.56
new_bfactor: 1.83510772993008
198 <Atom C> 28.05
new_bfactor: 1.83510772993008
199 <Atom O> 27.65
new_bfactor: 1.83510772993008
200 <Atom CB> 28.4
new_bfactor: 1.83510772993008
201 <Atom CG> 27.09
new_bfactor: 1.83510772993008
20

382 <Atom NH1> 31.62
new_bfactor: 2.4262875244147453
383 <Atom NH2> 26.75
new_bfactor: 2.4262875244147453
384 <Atom N> 27.49
new_bfactor: 0
385 <Atom CA> 26.93
new_bfactor: 0
386 <Atom C> 26.98
new_bfactor: 0
387 <Atom O> 26.64
new_bfactor: 0
388 <Atom CB> 23.81
new_bfactor: 0
389 <Atom N> 28.16
new_bfactor: 1.8912919787444784
390 <Atom CA> 28.74
new_bfactor: 1.8912919787444784
391 <Atom C> 27.7
new_bfactor: 1.8912919787444784
392 <Atom O> 27.24
new_bfactor: 1.8912919787444784
393 <Atom CB> 28.71
new_bfactor: 1.8912919787444784
394 <Atom CG> 27.13
new_bfactor: 1.8912919787444784
395 <Atom OD1> 29.09
new_bfactor: 1.8912919787444784
396 <Atom ND2> 32.06
new_bfactor: 1.8912919787444784
397 <Atom N> 28.04
new_bfactor: 2.0060896797495213
398 <Atom CA> 29.14
new_bfactor: 2.0060896797495213
399 <Atom C> 28.87
new_bfactor: 2.0060896797495213
400 <Atom O> 30.4
new_bfactor: 2.0060896797495213
401 <Atom N> 30.63
new_bfactor: 1.1143814552813525
402 <Atom CA> 31.87
new_bfactor: 1.1143814552813525
4

582 <Atom CB> 53.01
new_bfactor: 0
583 <Atom CG> 53.63
new_bfactor: 0
584 <Atom CD> 53.8
new_bfactor: 0
585 <Atom CE> 53.38
new_bfactor: 0
586 <Atom NZ> 50.66
new_bfactor: 0
587 <Atom N> 53.14
new_bfactor: 0
588 <Atom CA> 52.01
new_bfactor: 0
589 <Atom C> 51.51
new_bfactor: 0
590 <Atom O> 52.39
new_bfactor: 0
591 <Atom CB> 52.2
new_bfactor: 0
592 <Atom CG> 53.61
new_bfactor: 0
593 <Atom CD> 53.27
new_bfactor: 0
594 <Atom N> 50.74
new_bfactor: 0
595 <Atom CA> 49.36
new_bfactor: 0
596 <Atom C> 49.27
new_bfactor: 0
597 <Atom O> 47.66
new_bfactor: 0
598 <Atom CB> 48.05
new_bfactor: 0
599 <Atom CG> 44.0
new_bfactor: 0
600 <Atom CD1> 43.92
new_bfactor: 0
601 <Atom CD2> 36.3
new_bfactor: 0
602 <Atom N> 49.78
new_bfactor: 0
603 <Atom CA> 49.66
new_bfactor: 0
604 <Atom C> 50.29
new_bfactor: 0
605 <Atom O> 49.76
new_bfactor: 0
606 <Atom CB> 48.21
new_bfactor: 0
607 <Atom CG> 50.02
new_bfactor: 0
608 <Atom CD> 49.6
new_bfactor: 0
609 <Atom N> 49.54
new_bfactor: 0
610 <Atom CA> 49.18
new_bfactor: 

In [9]:
# save the pdb editted structure
io = PDBIO()
io.set_structure(structure)
io.save("entropy_diff_func_bfactor-2vkn.pdb")

### Swap b-factor values with entropy difference values
In this case, we will swap values between b-factor values and entropy difference between all versus hull sequences.

In [10]:
structure_2 = structure

In [11]:
new_bfactor_dff_hull = []

jj = 0 # index for searching in the dataframe 
for ii, res in enumerate(structure_2.get_residues()):
    
    # python index starts at 0, so we will reindex it to match sequence
    seq_pos = ii + 1 
    
    try:
        # match whether this sequence position 
        if seq_pos == dS_df.position59_map_to_2VKN.values[jj]:

            for atom in res:
                print('Add dS:', atom.get_id())
                new_bfactor_dff_hull.append(
                dS_df.entropy_diff_hull[jj]
                )
            
            jj = jj + 1
                
          
        else:
            for atom in res:
                print('Add 0:', atom.get_id())
                new_bfactor_dff_hull.append(0)
    except:
        if ii > len(bfactor):
            quit()
        else:
            for atom in res:
                new_bfactor_dff_hull.append(0)
      
            

Add 0: N
Add 0: CA
Add 0: C
Add 0: O
Add 0: CB
Add 0: CG
Add 0: OD1
Add 0: OD2
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: CE1
Add dS: CE2
Add dS: CZ
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG1
Add dS: CG2
Add dS: CD1
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: CE1
Add dS: CE2
Add dS: CZ
Add dS: OH
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD
Add dS: CE
Add dS: NZ
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD
Add dS: CE
Add dS: NZ
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: N
Add dS: CA
Add dS: C
Add dS: O
Add dS: CB
Add dS: CG
Add dS: CD1
Add dS: CD2
Add dS: CE1
Add dS: CE2
Add dS: CZ
Add dS: OH
Add dS: N


In [12]:
# make the actual edits onto the pdb structure 

for ii, atom in enumerate(structure_2.get_atoms()):

    print(ii, atom, atom.get_bfactor())
    atom.set_bfactor(new_bfactor_dff_hull[ii])
    print('new_bfactor:', atom.get_bfactor())

0 <Atom N> 0
new_bfactor: 0
1 <Atom CA> 0
new_bfactor: 0
2 <Atom C> 0
new_bfactor: 0
3 <Atom O> 0
new_bfactor: 0
4 <Atom CB> 0
new_bfactor: 0
5 <Atom CG> 0
new_bfactor: 0
6 <Atom OD1> 0
new_bfactor: 0
7 <Atom OD2> 0
new_bfactor: 0
8 <Atom N> 1.1556855023857886
new_bfactor: 1.4607987494629064
9 <Atom CA> 1.1556855023857886
new_bfactor: 1.4607987494629064
10 <Atom C> 1.1556855023857886
new_bfactor: 1.4607987494629064
11 <Atom O> 1.1556855023857886
new_bfactor: 1.4607987494629064
12 <Atom CB> 1.1556855023857886
new_bfactor: 1.4607987494629064
13 <Atom N> 1.925099791951915
new_bfactor: 2.268770084279315
14 <Atom CA> 1.925099791951915
new_bfactor: 2.268770084279315
15 <Atom C> 1.925099791951915
new_bfactor: 2.268770084279315
16 <Atom O> 1.925099791951915
new_bfactor: 2.268770084279315
17 <Atom CB> 1.925099791951915
new_bfactor: 2.268770084279315
18 <Atom CG> 1.925099791951915
new_bfactor: 2.268770084279315
19 <Atom CD1> 1.925099791951915
new_bfactor: 2.268770084279315
20 <Atom CD2> 1.925099

214 <Atom OE1> 0.8900679994739518
new_bfactor: 0.864605650561808
215 <Atom OE2> 0.8900679994739518
new_bfactor: 0.864605650561808
216 <Atom N> 2.0743503211574046
new_bfactor: 2.2609803820910312
217 <Atom CA> 2.0743503211574046
new_bfactor: 2.2609803820910312
218 <Atom C> 2.0743503211574046
new_bfactor: 2.2609803820910312
219 <Atom O> 2.0743503211574046
new_bfactor: 2.2609803820910312
220 <Atom CB> 2.0743503211574046
new_bfactor: 2.2609803820910312
221 <Atom CG> 2.0743503211574046
new_bfactor: 2.2609803820910312
222 <Atom CD> 2.0743503211574046
new_bfactor: 2.2609803820910312
223 <Atom OE1> 2.0743503211574046
new_bfactor: 2.2609803820910312
224 <Atom NE2> 2.0743503211574046
new_bfactor: 2.2609803820910312
225 <Atom N> 0.050533281811318
new_bfactor: -0.12020732216014918
226 <Atom CA> 0.050533281811318
new_bfactor: -0.12020732216014918
227 <Atom C> 0.050533281811318
new_bfactor: -0.12020732216014918
228 <Atom O> 0.050533281811318
new_bfactor: -0.12020732216014918
229 <Atom CB> 0.050533281

414 <Atom CB> 1.925337529662148
new_bfactor: 1.88409828772306
415 <Atom OG1> 1.925337529662148
new_bfactor: 1.88409828772306
416 <Atom CG2> 1.925337529662148
new_bfactor: 1.88409828772306
417 <Atom N> 0.7231601095548112
new_bfactor: 0.6884127107446016
418 <Atom CA> 0.7231601095548112
new_bfactor: 0.6884127107446016
419 <Atom C> 0.7231601095548112
new_bfactor: 0.6884127107446016
420 <Atom O> 0.7231601095548112
new_bfactor: 0.6884127107446016
421 <Atom N> 2.9022433881037006
new_bfactor: 2.74344768812393
422 <Atom CA> 2.9022433881037006
new_bfactor: 2.74344768812393
423 <Atom C> 2.9022433881037006
new_bfactor: 2.74344768812393
424 <Atom O> 2.9022433881037006
new_bfactor: 2.74344768812393
425 <Atom CB> 2.9022433881037006
new_bfactor: 2.74344768812393
426 <Atom CG1> 2.9022433881037006
new_bfactor: 2.74344768812393
427 <Atom CG2> 2.9022433881037006
new_bfactor: 2.74344768812393
428 <Atom CD1> 2.9022433881037006
new_bfactor: 2.74344768812393
429 <Atom N> 0.7895668156708184
new_bfactor: 0.6670

614 <Atom CG> 0
new_bfactor: 0
615 <Atom CD> 0
new_bfactor: 0
616 <Atom N> 0
new_bfactor: 0
617 <Atom CA> 0
new_bfactor: 0
618 <Atom C> 0
new_bfactor: 0
619 <Atom O> 0
new_bfactor: 0
620 <Atom CB> 0
new_bfactor: 0
621 <Atom CG> 0
new_bfactor: 0
622 <Atom CD1> 0
new_bfactor: 0
623 <Atom CD2> 0
new_bfactor: 0
624 <Atom N> 0
new_bfactor: 0
625 <Atom CA> 0
new_bfactor: 0
626 <Atom C> 0
new_bfactor: 0
627 <Atom O> 0
new_bfactor: 0
628 <Atom CB> 0
new_bfactor: 0
629 <Atom CG> 0
new_bfactor: 0
630 <Atom CD> 0
new_bfactor: 0
631 <Atom N> 0
new_bfactor: 0
632 <Atom CA> 0
new_bfactor: 0
633 <Atom C> 0
new_bfactor: 0
634 <Atom O> 0
new_bfactor: 0
635 <Atom CB> 0
new_bfactor: 0
636 <Atom CG> 0
new_bfactor: 0
637 <Atom CD1> 0
new_bfactor: 0
638 <Atom CD2> 0
new_bfactor: 0
639 <Atom N> 0
new_bfactor: 0
640 <Atom CA> 0
new_bfactor: 0
641 <Atom C> 0
new_bfactor: 0
642 <Atom O> 0
new_bfactor: 0
643 <Atom CB> 0
new_bfactor: 0
644 <Atom S> 0
new_bfactor: 0
645 <Atom O1> 0
new_bfactor: 0
646 <Atom O2> 0
n

In [13]:
# save the editted pdb.

io = PDBIO()
io.set_structure(structure_2)
io.save("entropy_diff_hull_bfactor-2vkn.pdb")

In [14]:
# in pyMol

# Right click ligand in sequence -> action -> extract object
# Select SO4 and H2O, right click, remove

# Type the following in pymol commannd

#spectrum b, white_red, minimum=0.0, maximum=3.0
#set surface_quality, 1
#rotate y,-90 

# set ligand color as marine