#  INIT

In [1]:
import sys
sys.path.append('../../')
sys.path.append('../infras/cellMix/')
sys.path.append('../infras/cytof_data/')
sys.path.append('../infras/')
sys.path.append('../experiments/')
sys.path.append('../preprocess/cell_specifics/')
sys.path.append('../preprocess/intra_variance/')
sys.path.append('../models/cell_proportions/')
sys.path.append('../measures/cell_proportions_measures/')

In [2]:
from data_factory import DataFactory

from cytof_cell_count_infra import CytofCellCountInfra
from cell_proportions_experiments import  CellProportionsExperiments

from basic import BasicDeconv
from pp_entropy_based import PpEntropyBased
from cell_proportions_measure import CellProportionsMeasure
from pp_clean_high_intra_var import PpCleanHighIntraVar
from pp_clean_irrelevant_proteins import PpCleanIrrelevantProteins
from pp_empty import PpEmpty
from pp_entropy_based_only_largest import PpEntropyBasedOnlyLargest
from aggregate_intra_variance import AggregateIntraVariance

In [3]:
import pandas as pd
import numpy as np
from functools import partial
import multiprocessing
from sklearn import pipeline
import itertools

from scipy.optimize import least_squares
from sklearn.metrics import mean_squared_error
from functools import partial
from scipy.optimize import minimize
import scipy.optimize
from itertools import combinations
import matplotlib.pyplot as plt
import seaborn as sns

import os

In [4]:
from deconv_py.preprocess.base import BasePreprocess as PP_base
from deconv_py.preprocess.cell_specific import CellSpecific as PP_proteins

# from deconv_py.infras.data_factory import DataFactory
from deconv_py.infras.data_loader import DataLoader

from deconv_py.models.base import Base as Models_base
from deconv_py.models.cell_proportions_models import CellProportions
from deconv_py.models.cell_specific_models import CellSpecificPerPermutation

from deconv_py.experiments.cell_specific import CellSpecificMetricsPlot

from cellMix_coordinator import CellMixCoordinator

In [5]:
%connect_info

{
  "shell_port": 53571,
  "iopub_port": 53572,
  "stdin_port": 53573,
  "control_port": 53574,
  "hb_port": 53575,
  "ip": "127.0.0.1",
  "key": "462be63b-f7749990c8c291098e6d39d3",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-1ab47c54-97b2-4449-9585-f8283d315b72.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


# read and build any data

## simple artificial 

In [6]:
data_factory = DataFactory()
# A_all_ar,B_ar,X_ar = data_factory.load_simple_artificial_profile("Intensity",index_func=lambda x:x,sample_to_pick="all")

## simple IBD

In [7]:
# A,B = data_factory.load_simple_IBD_profile("Intensity",index_func=lambda x:x,log2_transformation=True)

## IBD with A/B_all_vs

In [8]:
A_all_vs,_ = data_factory.load_IBD_all_vs("Intensity",index_func=lambda x:x,log2_transformation=True)
# A_all_vs,B_all_vs = data_factory.load_IBD_all_vs("Intensity",index_func=lambda x:x,log2_transformation=True)

  if __name__ == '__main__':


## IBD with A/B_all_vs with A_Intensity

In [9]:
# A_all_vs, B_all_vs ,A_intensity, B_intensity = data_factory.load_IBD_vs_A_and_B_intensity("Intensity",index_func=lambda x:x,log2_transformation=True)

## simulated_data

In [10]:
# A,X,B = data_factory.build_simulated_data()
_,X,B = data_factory.build_simulated_data()

## cytof data

# visualize data

# preprocess with model result - no cytof

In [11]:
# pp_eb = PpEntropyBased()
# bd=BasicDeconv()
# cpm = CellProportionsMeasure()

# steps = [("deconv_py",pp_eb),("BasicDeconv",bd)]
# ppline = pipeline.Pipeline(steps)
# result = ppline.predict([A,B])
# corr = cpm.correlation_measure(result,X)

In [12]:
# raise

# preprocess with model results - with cytof

# preprocess with model results - with intra variance 

In [13]:
# pp_eb = PpEntropyBased()
# bd=BasicDeconv()
# cpm = CellProportionsMeasure()
# chiv = PpCleanHighIntraVar()

# steps = [("high_intra_var",chiv),("entropy_base",pp_eb),("BasicDeconv",bd)]
# ppline = pipeline.Pipeline(steps)
# result = ppline.predict([A,B])
# corr = cpm.correlation_measure(result,X)

# run model - with and without cell mix

In [14]:
# bd = BasicDeconv(cellMix=True)
# bd.predict([A,B])


# build the meta pipeline

In [15]:
def build_static_configurations(hyper_configuration):
    static_configuration = {}
    
    step_configuration = []
    for step in hyper_configuration :
        functions_combs = []
        for s in step["steps"] : 
            func_name = s["function_name"]
            func = s["function"]
            params = s["params"]
            all_params_comb = list(itertools.product(*[[(k,vv) for vv in v] for k,v in params.items()]))
            for params_comb in all_params_comb:
                functions_comb = [func_name,func,params_comb]
                functions_combs.append(functions_comb)

        step_configuration.append(functions_combs)
#         static_configuration[step["step_name"]] =  functions_combs
    return  [list(zip([s["step_name"] for s in hyper_configuration],config)) for config in  itertools.product(*step_configuration)]

def build_pipelines(static_configurations):
    for static_conf in static_configurations : 
        pipeline_steps = []
        params_sklearn_set = {}
        for step in static_conf :
            function_name = step[1][0]
            function_inst = step[1][1]
            function_param = step[1][2]

            pipeline_steps.append((function_name,function_inst))
            params_sklearn_set.update({f"{function_name}__{p[0]}":p[1] for p in function_param})

        curr_ppline = pipeline.Pipeline(pipeline_steps)
        curr_ppline.set_params(**params_sklearn_set)
        yield curr_ppline


In [16]:
# agg
agg_iv = AggregateIntraVariance()
#cleen irrelevant
pp_irl_prot = PpCleanIrrelevantProteins()

#possible pre process
pp_entropy = PpEntropyBased()
pp_empty = PpEmpty()
pp_entropy_only_largest = PpEntropyBasedOnlyLargest()

# deconv
bd = BasicDeconv()

# measure
cpm = CellProportionsMeasure()

hyper_configuration = [{"step_name":"AggregateIntraVariance",
                       "steps":[
                           {"function_name":"AggregateIntraVariance","function":agg_iv,
                            "params" :{"how" : ["mean","median","first"]} }]},
                       #--------------------------------
                        {"step_name":"cleen_irrelevant_proteins",
                       "steps":[
                           {"function_name":"CleanIrrelevantProteins","function":pp_irl_prot,
                            "params" :{} }]},
                       #--------------------------------
                        {"step_name":"preprocess",
                       "steps":[
                           {"function_name":"PpEntropyBased","function":pp_entropy,
                            "params" :{"n_genes_per_cell":[5,15],"gene_entropy_trh":[0.0001],"with_norm" : [False]}},
                           {"function_name":"PpEntropyBasedOnlyLargest","function":pp_entropy_only_largest,
                            "params" :{"n_genes_per_cell":[5,15],"gene_entropy_trh":[0.0001],"with_norm" : [True,False]} },
                       {"function_name":"PpEmpty","function":pp_empty,
                            "params" :{} }]},
                       #--------------------------------
                       {"step_name":"deconv",
                       "steps":[
                           {"function_name":"BasicDeconv","function":bd,
                            "params" :{"normalize":[True],"cellMix":[True,False]}}]} ]


# hyper_measure_configuration = [{"step_name":"X",
#                        "steps":[
#                            {"function_name":"PreKnownProp","function":pre_known_prop,
#                             "params" :{"known_prop" : [X]} }]},
#                         #--------------------------------
#                         {"step_name":"measure",
#                        "steps":[
#                            {"function_name":"CellProportionsMeasure","function":cpm,
#                             "params" :{"how":["correlation"] } }]}]

In [17]:
static_configurations =  build_static_configurations(hyper_configuration)
pipeline_gen = build_pipelines(static_configurations) 

In [18]:
static_configurations =  build_static_configurations(hyper_configuration)
pipeline_gen = build_pipelines(static_configurations) 

best_params = [] 
best_mean = 0 
for pip in pipeline_gen : 
    print([i for _,i in pip.steps])
    result = pip.predict([A_all_vs,B])
    corr = cpm.correlation_measure(result,X)
    _mean = np.round(corr.mean(),3)
    
    if _mean == best_mean :
        best_params.append(pip)
        
    if _mean > best_mean :
        best_mean = _mean
        best_params = [pip]
    
    print(_mean)
#     print((corr.std()))
    print("-----new-----")
    

[AggregateIntraVariance(how='mean'), PpCleanIrrelevantProteins(), PpEntropyBased(gene_entropy_trh=0.0001, n_genes_per_cell=5,
        only_signature=None, with_norm=False), BasicDeconv(cellMix=True, normalize=True)]
nan
-----new-----
[AggregateIntraVariance(how='mean'), PpCleanIrrelevantProteins(), PpEntropyBased(gene_entropy_trh=0.0001, n_genes_per_cell=5,
        only_signature=None, with_norm=False), BasicDeconv(cellMix=False, normalize=True)]
nan
-----new-----
[AggregateIntraVariance(how='mean'), PpCleanIrrelevantProteins(), PpEntropyBased(gene_entropy_trh=0.0001, n_genes_per_cell=15,
        only_signature=None, with_norm=False), BasicDeconv(cellMix=True, normalize=True)]
nan
-----new-----
[AggregateIntraVariance(how='mean'), PpCleanIrrelevantProteins(), PpEntropyBased(gene_entropy_trh=0.0001, n_genes_per_cell=15,
        only_signature=None, with_norm=False), BasicDeconv(cellMix=False, normalize=True)]
nan
-----new-----
[AggregateIntraVariance(how='mean'), PpCleanIrrelevantProtei

KeyboardInterrupt: 

# plot experiment params summary 

# visualization of results