# Data analysis

Experiment information:
- One million function evaluations
- **sade_remc**: is the best method from HM, but with more evals
- **sade_mc_final**: is sade + MC + ffi9 + rmsd crowding + spicker + hooke jeeves on cluster centroids
- **sade_remc_final**: is the same as above, but REMC instead of MC
- **sade_mc_ffi9_02**: is HM method + forced fragment insertion of size 2 with 0.02 chance of happening per individal per generation
- **sade_remc_ffi9_02**: same as above but with REMC instead of MC

In [1]:
import datetime
import string
import random
import pickle
import time
import sys
import os
import re

import data_utils

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
sns.set(style="whitegrid");

In [2]:
root_path = '/home/h3nnn4n/progs/de_supimpa/tools/notebooks/analysis'
base_path = '/home/h3nnn4n/progs/de_supimpa/src'


def reset_path():
    os.chdir(base_path)
    
def reset_to_root_path():
    os.chdir(root_path)

    
reset_to_root_path()
reset_path()

In [3]:
runs = [
    'de_experiment_final',
    'de_sade_remc',
    'de_rosetta',
    'de_ffi'
]

In [4]:
dataset = data_utils.load_all_data(runs)
alldata = data_utils.merge_data(dataset)

INFO: Loaded 4 experiment runs dataset


`alldata` has the following format: protein -> experiment -> mode
Where:
1. protein is the 4 pdb letter code
2. experiment is the experiment name. Can be any string
3. mode is one of the following: 'best_by_rmsd', 'best_by_energy', 'all_repacks'

In [5]:
print(alldata.keys())
print(alldata['1crn'].keys())
print(alldata['1crn']['sade_remc'].keys())
alldata['1crn']['sade_remc']['best_by_energy']

dict_keys(['1ail', '1enh', '1zdd', '1crn'])
dict_keys(['sade_mc_ffi9_02', 'classic-abinitio', 'sade_mc_final', 'sade_remc_ffi9_02', 'sade_remc', 'sade_remc_final'])
dict_keys(['best_by_energy', 'all_repacks', 'best_by_rmsd'])


[{'repack_time': 12.6688,
  'rmsd_after': 10.2314,
  'rmsd_before': 10.0581,
  'score': 33.8755,
  'scorefxn': -66.0304},
 {'repack_time': 13.6276,
  'rmsd_after': 9.1116,
  'rmsd_before': 8.974,
  'score': 24.3402,
  'scorefxn': -58.7762},
 {'repack_time': 26.0412,
  'rmsd_after': 7.7697,
  'rmsd_before': 8.2319,
  'score': 27.0498,
  'scorefxn': -17.7188},
 {'repack_time': 13.3213,
  'rmsd_after': 9.0886,
  'rmsd_before': 9.077,
  'score': 22.7691,
  'scorefxn': -51.2532},
 {'repack_time': 14.9734,
  'rmsd_after': 10.7381,
  'rmsd_before': 10.7205,
  'score': 28.2609,
  'scorefxn': -52.7638},
 {'repack_time': 13.9423,
  'rmsd_after': 8.0749,
  'rmsd_before': 7.8955,
  'score': 20.3691,
  'scorefxn': -57.2372},
 {'repack_time': 15.9374,
  'rmsd_after': 9.995,
  'rmsd_before': 9.8592,
  'score': 27.1806,
  'scorefxn': -50.1208},
 {'repack_time': 14.8702,
  'rmsd_after': 5.67,
  'rmsd_before': 5.698,
  'score': 29.2131,
  'scorefxn': -56.2856},
 {'repack_time': 14.9984,
  'rmsd_after': 

In [6]:
def pretty_print_experiment_summary(experiment_summary):
    def protein_summary_print(p_data):
        keys = sorted(p_data.keys())

        for key in keys:
            experiment = p_data[key]
            name = key
            data = experiment['data']

            print('%25s    min: %8.2f  max: %8.2f  mean: %8.2f  std: %8.2f  median: %8.2f' % (
                name, data['min'], data['max'], data['mean'], data['std'], data['median']
            ))
    
    for protein in sorted(experiment_summary.keys()):
        print()
        print(protein.upper())

        protein_summary_print(experiment_summary[protein])

## Experiment Summary with *Best by Energy*

### RMSD data

In [7]:
experiment_summary_rmsd = data_utils.experiment_summary(alldata, mode='best_by_energy', metric='rmsd_after')
pretty_print_experiment_summary(experiment_summary_rmsd)


1AIL
         classic-abinitio    min:     4.75  max:    12.87  mean:     7.95  std:     1.56  median:     8.04
          sade_mc_ffi9_02    min:     6.37  max:    11.49  mean:     8.54  std:     1.15  median:     8.48
            sade_mc_final    min:     5.69  max:    17.66  mean:    10.15  std:     2.39  median:     9.88
                sade_remc    min:     4.26  max:    11.93  mean:     8.57  std:     1.42  median:     8.67
        sade_remc_ffi9_02    min:     4.46  max:    12.11  mean:     8.17  std:     1.36  median:     8.25
          sade_remc_final    min:     6.25  max:    20.64  mean:    10.55  std:     3.44  median:     9.49

1CRN
         classic-abinitio    min:     4.30  max:     9.70  mean:     6.57  std:     1.57  median:     6.16
          sade_mc_ffi9_02    min:     4.15  max:    11.48  mean:     8.60  std:     1.52  median:     8.82
            sade_mc_final    min:     5.76  max:    10.57  mean:     8.50  std:     1.27  median:     8.52
                sade_remc

## Energy data 

In [8]:
experiment_summary_scorefxn = data_utils.experiment_summary(alldata, mode='best_by_energy', metric='scorefxn')
pretty_print_experiment_summary(experiment_summary_scorefxn)


1AIL
         classic-abinitio    min:  -172.76  max:  -101.92  mean:  -131.39  std:    19.03  median:  -131.34
          sade_mc_ffi9_02    min:  -153.65  max:   -56.20  mean:  -121.99  std:    19.02  median:  -126.84
            sade_mc_final    min:  -160.78  max:  -117.85  mean:  -138.59  std:    10.18  median:  -136.77
                sade_remc    min:  -167.59  max:   -85.48  mean:  -124.49  std:    16.50  median:  -124.66
        sade_remc_ffi9_02    min:  -160.07  max:   -86.04  mean:  -131.82  std:    17.00  median:  -134.37
          sade_remc_final    min:  -166.29  max:  -107.73  mean:  -134.65  std:    11.21  median:  -134.95

1CRN
         classic-abinitio    min:   -79.64  max:   -42.08  mean:   -60.92  std:     8.73  median:   -59.14
          sade_mc_ffi9_02    min:   -75.67  max:   -35.16  mean:   -53.17  std:     8.49  median:   -53.07
            sade_mc_final    min:   -75.86  max:   -46.13  mean:   -62.55  std:     6.50  median:   -62.65
                sade_remc

## Experiment Summary with *Best by RMSD* 

## RMSD data

In [9]:
experiment_summary_scorefxn = data_utils.experiment_summary(alldata, mode='best_by_rmsd', metric='rmsd_after')
pretty_print_experiment_summary(experiment_summary_scorefxn)


1AIL
         classic-abinitio    min:     4.75  max:    12.87  mean:     7.95  std:     1.56  median:     8.04
          sade_mc_ffi9_02    min:     6.37  max:    11.49  mean:     8.54  std:     1.15  median:     8.48
            sade_mc_final    min:     4.92  max:     9.83  mean:     7.69  std:     1.22  median:     7.93
                sade_remc    min:     4.26  max:    11.93  mean:     8.57  std:     1.42  median:     8.67
        sade_remc_ffi9_02    min:     4.46  max:    12.11  mean:     8.17  std:     1.36  median:     8.25
          sade_remc_final    min:     4.79  max:     9.64  mean:     7.57  std:     1.14  median:     7.75

1CRN
         classic-abinitio    min:     4.30  max:     9.70  mean:     6.57  std:     1.57  median:     6.16
          sade_mc_ffi9_02    min:     4.15  max:    11.48  mean:     8.60  std:     1.52  median:     8.82
            sade_mc_final    min:     4.35  max:     8.66  mean:     6.82  std:     0.98  median:     7.02
                sade_remc

## Energy Data

In [10]:
experiment_summary_scorefxn = data_utils.experiment_summary(alldata, mode='best_by_rmsd', metric='scorefxn')
pretty_print_experiment_summary(experiment_summary_scorefxn)


1AIL
         classic-abinitio    min:  -172.76  max:  -101.92  mean:  -131.39  std:    19.03  median:  -131.34
          sade_mc_ffi9_02    min:  -153.65  max:   -56.20  mean:  -121.99  std:    19.02  median:  -126.84
            sade_mc_final    min:  -152.81  max:   -12.03  mean:  -109.20  std:    30.51  median:  -119.70
                sade_remc    min:  -167.59  max:   -85.48  mean:  -124.49  std:    16.50  median:  -124.66
        sade_remc_ffi9_02    min:  -160.07  max:   -86.04  mean:  -131.82  std:    17.00  median:  -134.37
          sade_remc_final    min:  -166.29  max:    11.58  mean:  -101.15  std:    38.71  median:  -110.65

1CRN
         classic-abinitio    min:   -79.64  max:   -42.08  mean:   -60.92  std:     8.73  median:   -59.14
          sade_mc_ffi9_02    min:   -75.67  max:   -35.16  mean:   -53.17  std:     8.49  median:   -53.07
            sade_mc_final    min:   -67.01  max:    35.37  mean:   -38.71  std:    24.16  median:   -42.72
                sade_remc