In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
import os
import sys
import pathlib
import pickle

sys.path.append('..')

In [4]:
import pandas as pd
from tqdm import tqdm
from collections import defaultdict

In [5]:
from pals.pimp_tools import get_pimp_API_token_from_env, PIMP_HOST, get_ms1_intensities, get_ms1_peaks, get_annotation_df, get_experimental_design
from pals.feature_extraction import DataSource
from pals.pathway_analysis import PALS
from pals.common import *

2019-11-29 11:05:53.056 | INFO     | pals.reactome:get_neo4j_driver:24 - Created graph database driver for bolt://localhost:7687 (neo4j)


# HAT Data Analysis

### Load data

Generate token by logging in to PiMP

In [6]:
# username = 'joewandy' # PiMP username
# password = 'enter' # PiMP password
# host = 'localhost:8000' # server address and port
# token = get_authentication_token(host, username, password)

Assume token is stored in environmental variable *PIMP_API_TOKEN*

In [7]:
token = get_pimp_API_token_from_env()

In [8]:
def get_data(token, hostname, analysis_id, database_name):
    int_df = get_ms1_intensities(token, hostname, analysis_id)
    annotation_df = get_annotation_df(token, PIMP_HOST, analysis_id, database_name)
    experimental_design = get_experimental_design(token, PIMP_HOST, analysis_id)
    return int_df, annotation_df, experimental_design

In [9]:
analysis_id_plasma = 636
int_df_plasma, annotation_df_plasma, experimental_design_plasma = get_data(token, PIMP_HOST, analysis_id_plasma, 'kegg')

http://polyomics.mvls.gla.ac.uk/export/get_ms1_intensities?analysis_id=636 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_ms1_peaks?analysis_id=636 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_experimental_design?analysis_id=636 <Response [200]>


In [10]:
analysis_id_csf = 635
int_df_csf, annotation_df_csf, experimental_design_csf = get_data(token, PIMP_HOST, analysis_id_csf, 'kegg')

http://polyomics.mvls.gla.ac.uk/export/get_ms1_intensities?analysis_id=635 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_ms1_peaks?analysis_id=635 <Response [200]>
http://polyomics.mvls.gla.ac.uk/export/get_experimental_design?analysis_id=635 <Response [200]>


### Create Data Sources

In [11]:
database_name = DATABASE_PIMP_KEGG
ds_plasma = DataSource(int_df_plasma, annotation_df_plasma, experimental_design_plasma, database_name)
ds_csf = DataSource(int_df_csf, annotation_df_csf, experimental_design_csf, database_name)

2019-11-29 11:07:05.847 | DEBUG    | pals.feature_extraction:__init__:40 - Loading C:\Users\joewa\Work\git\PALS\pals\data\PiMP_KEGG.json.zip
2019-11-29 11:07:05.879 | DEBUG    | pals.feature_extraction:__init__:92 - Mapping pathway to unique ids
2019-11-29 11:07:05.886 | DEBUG    | pals.feature_extraction:__init__:106 - Creating dataset to pathway mapping
2019-11-29 11:07:06.324 | DEBUG    | pals.feature_extraction:__init__:131 - Computing unique id counts
2019-11-29 11:07:06.331 | DEBUG    | pals.feature_extraction:__init__:40 - Loading C:\Users\joewa\Work\git\PALS\pals\data\PiMP_KEGG.json.zip
2019-11-29 11:07:06.353 | DEBUG    | pals.feature_extraction:__init__:92 - Mapping pathway to unique ids
2019-11-29 11:07:06.360 | DEBUG    | pals.feature_extraction:__init__:106 - Creating dataset to pathway mapping
2019-11-29 11:07:06.636 | DEBUG    | pals.feature_extraction:__init__:131 - Computing unique id counts


#### Disable debug and info logging

In [12]:
from loguru import logger
logger.remove()
logger.add(sys.stderr, level='WARNING')

1

### Run PALS Experiments

#### Run experiment

In [32]:
def run_pals(experiment_name, data_source, case, control, n_samples, significant_column):
    res = {
        'experiment_name': experiment_name,
        'data_source': data_source,
        'case': case,
        'control': control,
        'n_samples': n_samples,
        'significant_column': significant_column,
        'pals': None,
        'ora': None
    }
    
    # generate PALS results
    results = {}
    for n_sample in n_samples:
        results[n_sample] = []
        for i in range(n_iter):
            print('n_sample=%d iter=%d PALS experiment=%s case=%s control=%s' % (n_sample, i, experiment_name, case, control))
            ds_resampled = data_source.resample(case, control, n_sample)
            pals = PALS(ds_resampled)
            pathway_df = pals.get_pathway_df()
            results[n_sample].append(pathway_df)

    res['pals'] = results        
    return res

In [33]:
n_samples = [4, 8, 12]
n_iter = 100
results = {}

#### Plasma

In [34]:
data_source = ds_plasma
experiment_name = 'plasma'
case = 'Stage1'
control = 'Control'
significant_column = 'PiMP_KEGG Stage1/Control comb_p'

In [35]:
res = run_pals(experiment_name, data_source, case, control, n_samples, significant_column)
results[experiment_name] = res

n_sample=4 iter=0 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=1 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=2 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=3 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=4 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=5 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=6 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=7 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=8 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=9 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=10 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=11 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=12 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=13 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=14 PALS experi

  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


n_sample=4 iter=71 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=72 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=73 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=74 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=75 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=76 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=77 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=78 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=79 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=80 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=81 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=82 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=83 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=84 PALS experiment=plasma case=Stage1 control=Control
n_sample=4 iter=85 P



n_sample=12 iter=12 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=13 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=14 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=15 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=16 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=17 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=18 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=19 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=20 PALS experiment=plasma case=Stage1 control=Control




n_sample=12 iter=21 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=22 PALS experiment=plasma case=Stage1 control=Control




n_sample=12 iter=23 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=24 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=25 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=26 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=27 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=28 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=29 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=30 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=31 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=32 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=33 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=34 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=35 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=36 PALS experiment=plasma case=Stage1 control=Control
n_samp



n_sample=12 iter=39 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=40 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=41 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=42 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=43 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=44 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=45 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=46 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=47 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=48 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=49 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=50 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=51 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=52 PALS experiment=plasma case=Stage1 control=Control
n_samp



n_sample=12 iter=80 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=81 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=82 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=83 PALS experiment=plasma case=Stage1 control=Control




n_sample=12 iter=84 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=85 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=86 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=87 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=88 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=89 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=90 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=91 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=92 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=93 PALS experiment=plasma case=Stage1 control=Control




n_sample=12 iter=94 PALS experiment=plasma case=Stage1 control=Control




n_sample=12 iter=95 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=96 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=97 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=98 PALS experiment=plasma case=Stage1 control=Control
n_sample=12 iter=99 PALS experiment=plasma case=Stage1 control=Control


#### CSF

In [36]:
data_source = ds_csf
experiment_name = 'csf'
case = 'Stage_2'
control = 'Control'
significant_column = 'PiMP_KEGG Stage_2/Control comb_p'

In [37]:
res = run_pals(experiment_name, data_source, case, control, n_samples, significant_column)
results[experiment_name] = res

n_sample=4 iter=0 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=1 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=2 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=3 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=4 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=5 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=6 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=7 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=8 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=9 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=10 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=11 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=12 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=13 PALS experiment=csf case=Stage_2 control=Control
n_sample=4 iter=14 PALS experiment=csf case=Stage_2 contro



n_sample=12 iter=6 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=7 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=8 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=9 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=10 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=11 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=12 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=13 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=14 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=15 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=16 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=17 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=18 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=19 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=20 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=21 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=22 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=23 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=24 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=25 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=26 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=27 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=28 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=29 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=30 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=31 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=32 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=33 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=34 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=35 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=36 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=37 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=38 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=39 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=40 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=41 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=42 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=43 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=44 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=45 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=46 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=47 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=48 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=49 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=50 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=51 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=52 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=53 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=54 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=55 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=56 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=57 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=58 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=59 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=60 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=61 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=62 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=63 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=64 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=65 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=66 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=67 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=68 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=69 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=70 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=71 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=72 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=73 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=74 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=75 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=76 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=77 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=78 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=79 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=80 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=81 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=82 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=83 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=84 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=85 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=86 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=87 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=88 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=89 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=90 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=91 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=92 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=93 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=94 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=95 PALS experiment=csf case=Stage_2 control=Control




n_sample=12 iter=96 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=97 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=98 PALS experiment=csf case=Stage_2 control=Control
n_sample=12 iter=99 PALS experiment=csf case=Stage_2 control=Control


#### Save Results

In [38]:
save_obj(results, os.path.join('test_data', 'HAT_results.p'))

Saving <class 'dict'> to test_data\HAT_results.p
