In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
from loguru import logger

In [4]:
import os
import sys
sys.path.append('..')

In [5]:
from pals.pimp_tools import get_pimp_API_token_from_env, PIMP_HOST, download_from_pimp
from pals.evaluation import run_experiment
from pals.common import save_obj, DATABASE_PIMP_KEGG
from pals.feature_extraction import DataSource

2019-11-29 16:14:28.367 | INFO     | pals.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)


# HAT Data Analysis

This notebook is used to generate resampled data and run the different methods for comparison in the manuscript.

### Load data

Assume token is stored in environmental variable *PIMP_API_TOKEN*

In [6]:
token = get_pimp_API_token_from_env()

In [7]:
analysis_id_plasma = 636
int_df_plasma, annotation_df_plasma, experimental_design_plasma = download_from_pimp(token, PIMP_HOST, analysis_id_plasma, 'kegg')

http://polyomics.mvls.gla.ac.uk/export/get_ms1_intensities?analysis_id=636 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_ms1_peaks?analysis_id=636 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_experimental_design?analysis_id=636 <Response [200]>


In [8]:
analysis_id_csf = 635
int_df_csf, annotation_df_csf, experimental_design_csf = download_from_pimp(token, PIMP_HOST, analysis_id_csf, 'kegg')

http://polyomics.mvls.gla.ac.uk/export/get_ms1_intensities?analysis_id=635 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_ms1_peaks?analysis_id=635 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_experimental_design?analysis_id=635 <Response [200]>


### Create Data Sources

In [9]:
database_name = DATABASE_PIMP_KEGG
ds_plasma = DataSource(int_df_plasma, annotation_df_plasma, experimental_design_plasma, database_name)
ds_csf = DataSource(int_df_csf, annotation_df_csf, experimental_design_csf, database_name)

2019-11-29 16:15:36.813 | DEBUG    | pals.feature_extraction:__init__:40 - Loading C:\Users\joewa\Work\git\PALS\pals\data\PiMP_KEGG.json.zip
2019-11-29 16:15:36.836 | DEBUG    | pals.feature_extraction:__init__:92 - Mapping pathway to unique ids
2019-11-29 16:15:36.841 | DEBUG    | pals.feature_extraction:__init__:106 - Creating dataset to pathway mapping
2019-11-29 16:15:37.201 | DEBUG    | pals.feature_extraction:__init__:131 - Computing unique id counts
2019-11-29 16:15:37.208 | DEBUG    | pals.feature_extraction:__init__:40 - Loading C:\Users\joewa\Work\git\PALS\pals\data\PiMP_KEGG.json.zip
2019-11-29 16:15:37.228 | DEBUG    | pals.feature_extraction:__init__:92 - Mapping pathway to unique ids
2019-11-29 16:15:37.235 | DEBUG    | pals.feature_extraction:__init__:106 - Creating dataset to pathway mapping
2019-11-29 16:15:37.515 | DEBUG    | pals.feature_extraction:__init__:131 - Computing unique id counts


#### Disable debug logging

In [10]:
logger.remove()
logger.add(sys.stderr, level='INFO')

1

### Run PALS Experiments

In [11]:
n_samples = [4, 8, 12]
n_iter = 100
results = {}

#### Run on Plasma samples

In [12]:
data_source = ds_plasma
experiment_name = 'plasma'
case = 'Stage1'
control = 'Control'
significant_column = 'PiMP_KEGG Stage1/Control comb_p'

In [13]:
res = run_experiment(experiment_name, data_source, case, control, n_samples, significant_column, n_iter)
results[experiment_name] = res

2019-11-29 16:15:37.795 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=0 PALS experiment=plasma case=Stage1 control=Control
2019-11-29 16:15:39.865 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=1 PALS experiment=plasma case=Stage1 control=Control
2019-11-29 16:15:42.003 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=2 PALS experiment=plasma case=Stage1 control=Control
2019-11-29 16:15:44.065 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=3 PALS experiment=plasma case=Stage1 control=Control
2019-11-29 16:15:46.127 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=4 PALS experiment=plasma case=Stage1 control=Control
2019-11-29 16:15:48.245 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=5 PALS experiment=plasma case=Stage1 control=Control
2019-11-29 16:15:50.335 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=6 PALS experiment=plasma case=Stage1 control=Control
2019-1

#### Run on CSF samples

In [14]:
data_source = ds_csf
experiment_name = 'csf'
case = 'Stage_2'
control = 'Control'
significant_column = 'PiMP_KEGG Stage_2/Control comb_p'

In [15]:
res = run_experiment(experiment_name, data_source, case, control, n_samples, significant_column, n_iter)
results[experiment_name] = res

2019-11-29 16:27:51.941 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=0 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:27:53.878 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=1 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:27:55.812 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=2 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:27:57.782 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=3 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:27:59.713 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=4 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:28:01.638 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=5 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:28:03.565 | INFO     | pals.evaluation:run_experiment:36 - n_sample=4 iter=6 PALS experiment=csf case=Stage_2 control=Control
2019-11-29 16:28:05.

#### Save Results

In [16]:
save_obj(results, os.path.join('test_data', 'HAT', 'HAT_results.p'))

Saving <class 'dict'> to test_data\HAT\HAT_results.p
