# Hotspot Analysis
Here we map hotspots onto all the protein structures

In [6]:
import os
import pandas as pd
from tqdm import tqdm

from ccdc.protein import Protein
from hotspots.calculation import Runner
from hotspots.hs_io import HotspotWriter

In [7]:
# Check correct environment variables are set
!echo $GHECOM_EXE
!echo $CSDHOME 

/home/arj39/ghecom
/home/arj39/CCDC/CSD_2020/


We use Hotspots API to identify hotspots over all the available structures.

In [8]:
def identify_hotspots(path_stem, protein):
    
    # Load protein and prepare for hotspot analysis
    print(os.getcwd() + '/structures/' + path_stem + protein)
    prot = Protein.from_file(os.getcwd() + '/structures/' + path_stem + protein)
    prot.remove_all_waters()
    prot.add_hydrogens()
    for l in prot.ligands:
        prot.remove_ligand(l.identifier)
    
    # Perform Hotspot analysis
    runner = Runner()
    # Only SuperStar jobs are parallelised (one job per processor). By default there are 3 jobs, when calculating charged interactions there are 5.
    results = runner.from_protein(prot, nprocesses=5)
    
    # Create output directory
    output_dir = os.getcwd() + '/hotspots/' + path_stem + protein[:-4]
    !mkdir $output_dir
    
    # Write files. Creates "results/pdb1/out.zip"
    with HotspotWriter(output_dir) as writer:
        writer.write(results)

In [5]:
protein = 'wS-5xlr_C-6acj_C.pdb'
path_stem = 'korkin_lab/IndividualModels/'

identify_hotspots(path_stem, protein)

/home/arj39/Documents/github/covid/structures/korkin_lab/IndividualModels/wS-5xlr_C-6acj_C.pdb
None
Start atomic hotspot detection
        Processors: 5
/tmp/tmpr9vfx7tv
/tmp/tmpr9vfx7tv
/tmp/tmpr9vfx7tv
ligsite extrema (0.0, 7.0)
ligsite extrema (0.0, 7.0)
ligsite extrema (0.0, 7.0)
Atomic hotspot detection complete

Start buriedness calculation
    method: Ghecom
Buriedness calcualtion complete

Start sampling



  0%|          | 0/3000 [00:00<?, ?it/s][A


    nRotations: 3000 nTranslations: 8011 probename: apolar



  0%|          | 1/3000 [00:00<11:34,  4.32it/s][A
  0%|          | 2/3000 [00:00<11:20,  4.41it/s][A
  0%|          | 3/3000 [00:00<11:27,  4.36it/s][A
  0%|          | 4/3000 [00:00<11:40,  4.27it/s][A
  0%|          | 5/3000 [00:01<11:31,  4.33it/s][A
  0%|          | 6/3000 [00:01<11:37,  4.29it/s][A
  0%|          | 7/3000 [00:01<11:29,  4.34it/s][A
  0%|          | 8/3000 [00:01<11:29,  4.34it/s][A
  0%|          | 9/3000 [00:02<11:40,  4.27it/s][A
  0%|          | 10/3000 [00:02<11:47,  4.22it/s][A
  0%|          | 11/3000 [00:02<12:11,  4.08it/s][A
  0%|          | 12/3000 [00:02<12:01,  4.14it/s][A
  0%|          | 13/3000 [00:03<12:10,  4.09it/s][A
  0%|          | 14/3000 [00:03<11:49,  4.21it/s][A
  0%|          | 15/3000 [00:03<11:45,  4.23it/s][A
  1%|          | 16/3000 [00:03<11:46,  4.22it/s][A
  1%|          | 17/3000 [00:04<11:47,  4.22it/s][A
  1%|          | 18/3000 [00:04<12:05,  4.11it/s][A
  1%|          | 19/3000 [00:04<11:57,  4.16it/s][A
 


    nRotations: 3000 nTranslations: 2828 probename: donor



  0%|          | 4/3000 [00:00<03:10, 15.72it/s][A
  0%|          | 6/3000 [00:00<03:08, 15.85it/s][A
  0%|          | 8/3000 [00:00<03:08, 15.91it/s][A
  0%|          | 10/3000 [00:00<03:14, 15.38it/s][A
  0%|          | 12/3000 [00:00<03:11, 15.64it/s][A
  0%|          | 14/3000 [00:00<03:12, 15.52it/s][A
  1%|          | 16/3000 [00:01<03:17, 15.09it/s][A
  1%|          | 18/3000 [00:01<03:19, 14.93it/s][A
  1%|          | 20/3000 [00:01<03:15, 15.22it/s][A
  1%|          | 22/3000 [00:01<03:15, 15.26it/s][A
  1%|          | 24/3000 [00:01<03:12, 15.50it/s][A
  1%|          | 26/3000 [00:01<03:19, 14.93it/s][A
  1%|          | 28/3000 [00:01<03:19, 14.91it/s][A
  1%|          | 30/3000 [00:01<03:16, 15.10it/s][A
  1%|          | 32/3000 [00:02<03:16, 15.11it/s][A
  1%|          | 34/3000 [00:02<03:22, 14.65it/s][A
  1%|          | 36/3000 [00:02<03:21, 14.73it/s][A
  1%|▏         | 38/3000 [00:02<03:19, 14.88it/s][A
  1%|▏         | 40/3000 [00:02<03:18, 14.95it/s


    nRotations: 3000 nTranslations: 2975 probename: acceptor



  0%|          | 4/3000 [00:00<03:05, 16.12it/s][A
  0%|          | 6/3000 [00:00<03:05, 16.16it/s][A
  0%|          | 8/3000 [00:00<03:02, 16.36it/s][A
  0%|          | 10/3000 [00:00<03:08, 15.83it/s][A
  0%|          | 12/3000 [00:00<03:05, 16.06it/s][A
  0%|          | 14/3000 [00:00<03:00, 16.57it/s][A
  1%|          | 16/3000 [00:00<03:02, 16.35it/s][A
  1%|          | 18/3000 [00:01<02:53, 17.18it/s][A
  1%|          | 20/3000 [00:01<02:56, 16.86it/s][A
  1%|          | 22/3000 [00:01<03:01, 16.38it/s][A
  1%|          | 24/3000 [00:01<03:02, 16.27it/s][A
  1%|          | 26/3000 [00:01<03:01, 16.35it/s][A
  1%|          | 28/3000 [00:01<03:02, 16.29it/s][A
  1%|          | 30/3000 [00:01<02:57, 16.71it/s][A
  1%|          | 32/3000 [00:01<02:57, 16.69it/s][A
  1%|          | 34/3000 [00:02<02:58, 16.63it/s][A
  1%|          | 36/3000 [00:02<02:57, 16.67it/s][A
  1%|▏         | 38/3000 [00:02<02:56, 16.81it/s][A
  1%|▏         | 40/3000 [00:02<02:55, 16.84it/s

Sampling complete

Runtime = 15064.238420963287seconds
mkdir: cannot create directory ‘/home/arj39/Documents/github/covid/hotspots/korkin_lab/IndividualModels/wS-5xlr_C-6acj_C’: File exists
10
14
17


### Iterate over all structures

In [None]:
## Korkin Lab
### Individual Models
path_stem = 'korkin_lab/IndividualModels/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]

### IntraViral Complexes
path_stem = 'korkin_lab/IntraViralComplexes/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]

### ViralHuman Complexes
path_stem = 'korkin_lab/ViralHumanComplexes/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]

## Feig Lab
path_stem = 'FeigLab/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]

## AlphaFold Refined
path_stem = 'AlphaFold_refined/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]

## Deepmind Structures
path_stem = 'deepmind_structures/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]

## Experimental Structures
path_stem = 'experimental_structures/'
files = os.listdir('structures/' + path_stem)
files = [f for f in files if f.endswith('.pdb')]
[identify_hotspots(path_stem=path_stem, protein=f) for f in tqdm(files)]


  0%|          | 0/19 [00:00<?, ?it/s][A

/home/arj39/Documents/github/covid/structures/korkin_lab/IntraViralComplexes/wN-Nterminal_domain_homopentamer-4UD1.pdb
None
Start atomic hotspot detection
        Processors: 5
/tmp/tmp1tvnu1ky
/tmp/tmp1tvnu1ky
/tmp/tmp1tvnu1ky
ligsite extrema (0.0, 7.0)
ligsite extrema (0.0, 7.0)
ligsite extrema (0.0, 7.0)
Atomic hotspot detection complete

Start buriedness calculation
    method: Ghecom
Buriedness calcualtion complete

Start sampling




  0%|          | 0/3000 [00:00<?, ?it/s][A[A

  0%|          | 1/3000 [00:00<08:53,  5.62it/s][A[A


    nRotations: 3000 nTranslations: 5865 probename: apolar




  0%|          | 2/3000 [00:00<09:12,  5.43it/s][A[A

  0%|          | 3/3000 [00:00<08:36,  5.81it/s][A[A

  0%|          | 4/3000 [00:00<08:25,  5.93it/s][A[A

  0%|          | 5/3000 [00:00<08:31,  5.86it/s][A[A

  0%|          | 6/3000 [00:01<08:12,  6.08it/s][A[A

  0%|          | 7/3000 [00:01<08:06,  6.15it/s][A[A

  0%|          | 8/3000 [00:01<08:05,  6.16it/s][A[A

  0%|          | 9/3000 [00:01<08:10,  6.10it/s][A[A

  0%|          | 10/3000 [00:01<08:11,  6.09it/s][A[A

  0%|          | 11/3000 [00:01<08:07,  6.13it/s][A[A

  0%|          | 12/3000 [00:01<08:16,  6.02it/s][A[A

  0%|          | 13/3000 [00:02<08:18,  5.99it/s][A[A

  0%|          | 14/3000 [00:02<08:11,  6.08it/s][A[A

  0%|          | 15/3000 [00:02<08:04,  6.16it/s][A[A

  1%|          | 16/3000 [00:02<08:09,  6.10it/s][A[A

  1%|          | 17/3000 [00:02<08:20,  5.96it/s][A[A

  1%|          | 18/3000 [00:02<08:25,  5.90it/s][A[A

  1%|          | 19/3000 [00:03<08:14


    nRotations: 3000 nTranslations: 1982 probename: donor




  0%|          | 6/3000 [00:00<02:10, 22.90it/s][A[A

  0%|          | 9/3000 [00:00<02:11, 22.82it/s][A[A

  0%|          | 12/3000 [00:00<02:08, 23.21it/s][A[A

  0%|          | 15/3000 [00:00<02:10, 22.79it/s][A[A

  1%|          | 18/3000 [00:00<02:14, 22.21it/s][A[A

  1%|          | 21/3000 [00:00<02:14, 22.19it/s][A[A

  1%|          | 24/3000 [00:01<02:13, 22.37it/s][A[A

  1%|          | 27/3000 [00:01<02:10, 22.75it/s][A[A

  1%|          | 30/3000 [00:01<02:11, 22.60it/s][A[A

  1%|          | 33/3000 [00:01<02:12, 22.45it/s][A[A

  1%|          | 36/3000 [00:01<02:14, 22.04it/s][A[A

  1%|▏         | 39/3000 [00:01<02:12, 22.32it/s][A[A

  1%|▏         | 42/3000 [00:01<02:16, 21.75it/s][A[A

  2%|▏         | 45/3000 [00:02<02:12, 22.24it/s][A[A

  2%|▏         | 48/3000 [00:02<02:12, 22.36it/s][A[A

  2%|▏         | 51/3000 [00:02<02:10, 22.63it/s][A[A

  2%|▏         | 54/3000 [00:02<02:09, 22.75it/s][A[A

  2%|▏         | 57/3000 [00:02


    nRotations: 3000 nTranslations: 3190 probename: acceptor




  0%|          | 4/3000 [00:00<03:20, 14.97it/s][A[A

  0%|          | 6/3000 [00:00<03:22, 14.79it/s][A[A

  0%|          | 8/3000 [00:00<03:26, 14.51it/s][A[A

  0%|          | 10/3000 [00:00<03:29, 14.29it/s][A[A

  0%|          | 12/3000 [00:00<03:28, 14.30it/s][A[A

  0%|          | 14/3000 [00:00<03:31, 14.11it/s][A[A

  1%|          | 16/3000 [00:01<03:29, 14.26it/s][A[A

  1%|          | 18/3000 [00:01<03:26, 14.46it/s][A[A

  1%|          | 20/3000 [00:01<03:19, 14.93it/s][A[A

  1%|          | 22/3000 [00:01<03:16, 15.15it/s][A[A

  1%|          | 24/3000 [00:01<03:19, 14.93it/s][A[A

  1%|          | 26/3000 [00:01<03:22, 14.70it/s][A[A

  1%|          | 28/3000 [00:01<03:18, 14.99it/s][A[A

  1%|          | 30/3000 [00:02<03:17, 15.05it/s][A[A

  1%|          | 32/3000 [00:02<03:17, 15.01it/s][A[A

  1%|          | 34/3000 [00:02<03:07, 15.81it/s][A[A

  1%|          | 36/3000 [00:02<03:15, 15.18it/s][A[A

  1%|▏         | 38/3000 [00:02<

Sampling complete

Runtime = 2941.764390230179seconds
10
14
17



  5%|▌         | 1/19 [49:25<14:49:43, 2965.73s/it][A

/home/arj39/Documents/github/covid/structures/korkin_lab/IntraViralComplexes/wS_homotrimer-Conf2-5X5B.pdb
None
Start atomic hotspot detection
        Processors: 5
/tmp/tmp7h15nowl
/tmp/tmp7h15nowl
/tmp/tmp7h15nowl
