In [1]:
import sys
sys.path.append('../../')
sys.path.append('../infras/cellMix/')

In [2]:
import pandas as pd
import numpy as np
from functools import partial
import multiprocessing

from scipy.optimize import least_squares
from sklearn.metrics import mean_squared_error
from functools import partial
from scipy.optimize import minimize
import scipy.optimize
from itertools import combinations
import matplotlib.pyplot as plt

import os

In [3]:
from deconv_py.preprocess.base import BasePreprocess as PP_base
from deconv_py.preprocess.cell_specific import CellSpecific as PP_proteins

from deconv_py.infras.data_factory import DataFactory
from deconv_py.infras.data_loader import DataLoader

from deconv_py.models.base import Base as Models_base
from deconv_py.models.cell_proportions_models import CellProportions
from deconv_py.models.cell_specific_models import CellSpecificPerPermutation

from deconv_py.experiments.cell_specific import CellSpecificMetricsPlot

from deconv_py.infras.cellMix.cellMix_coordinator import CellMixCoordinator

# Calc cell specific - LFQ

## read data

### mixture & cell specific data

In [4]:
profile_data = pd.read_csv('../data/test/sigMat.5subtypes.NOT.0622.fc.q.2.txt',sep = "\t").set_index('gene')
mixtures_data = pd.read_csv('../data/test/mixture_genename.txt',sep = "\t")
mixtures = mixtures_data.rename(columns={'rowname':'gene'}).set_index('gene').rename(columns={col : col.split('LFQ.intensity.')[1]  for col in mixtures_data.columns if 'LFQ.intensity' in col})
_ciberSort = pd.read_csv('../data/test/CIBERSORT.Output_Job21.txt',sep = "\t")

In [5]:
cell_proportions_df = pd.DataFrame(index = ['LFQ intensity NOT_CD4TCellTcm_01','LFQ intensity NOT_BCellmemory_01','LFQ intensity NOT_Monocytesnonclassical_01'],
                                   columns = list(range(1,9)),
                                   data =np.array(((100,0,0),(0,100,0),(0,0,100),(33,33,33),(25,25,50),(25,50,25),(50,25,25),(47.5,47.5,5.0))).T)

In [6]:
A = profile_data
B = mixtures
X = cell_proportions_df
ref_ciberSort = _ciberSort.drop(columns=['Input Sample','P-value','Pearson Correlation','RMSE'])

In [7]:
mutual_genes = A.index.intersection(B.index)
A = A.loc[mutual_genes]
B = B.loc[mutual_genes]

## models

In [8]:
def rearrange(matrix) : 
    # Create the permutation matrix by placing 1 at each row with the column to replace with
    your_permutation = [1,0,3,2,4]

    perm_mat = np.zeros((len(your_permutation), len(your_permutation)))

    for idx, i in enumerate(your_permutation):
        perm_mat[idx, i] = 1

    return (np.dot(matrix, perm_mat))

In [9]:
cmc = CellMixCoordinator()
def _calc_and_display_with_cellmix(_a,_b,X,with_cellMix = False) : 
    _x = X.T
    _x_columns =_x.columns.tolist()+['CD8TCell','NK']
    cell_abundance_over_samples = CellProportions.fit(_a,_b.values)
    
    if with_cellMix : 
        cellMax_cell_abundance_over_samples = cmc.cell_prop_with_bashop_with_bash(_b,_a)
        display(rearrange(cellMax_cell_abundance_over_samples))
    display(pd.DataFrame(index=range(1,9),columns=_x_columns,data=rearrange(cell_abundance_over_samples)))
    display(pd.DataFrame(index=range(1,9),columns=_x_columns,data=rearrange(ref_ciberSort).round(2)))
    display(_x)
    

In [10]:
_calc_and_display_with_cellmix(A,B,X)


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01,CD8TCell,NK
1,0.97,0.03,0.0,0.0,0.0
2,0.03,0.95,0.01,0.0,0.01
3,0.0,0.0,1.0,0.0,0.0
4,0.28,0.18,0.54,0.0,0.0
5,0.14,0.33,0.53,0.0,0.0
6,0.25,0.0,0.75,0.0,0.0
7,0.36,0.19,0.45,0.0,0.0
8,0.36,0.56,0.08,0.0,0.0


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01,CD8TCell,NK
1,1.0,0.0,0.0,0.0,0.0
2,0.0,0.95,0.0,0.0,0.05
3,0.01,0.03,0.85,0.11,0.0
4,0.31,0.23,0.41,0.0,0.04
5,0.18,0.39,0.37,0.0,0.06
6,0.23,0.03,0.63,0.11,0.0
7,0.35,0.26,0.34,0.0,0.04
8,0.37,0.53,0.05,0.0,0.04


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01
1,100.0,0.0,0.0
2,0.0,100.0,0.0
3,0.0,0.0,100.0
4,33.0,33.0,33.0
5,25.0,25.0,50.0
6,25.0,50.0,25.0
7,50.0,25.0,25.0
8,47.5,47.5,5.0


### with preprocess

#### TopMergin

In [11]:
_A,_B = PP_proteins.pp_clean_irrelevant_proteins(A,B)
_A,_B = PP_proteins.pp_margin_quantile(_A,_B)

_calc_and_display_with_cellmix(_A,_B,X)

Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01,CD8TCell,NK
1,0.99,0.01,0.0,0.0,0.0
2,0.03,0.94,0.02,0.0,0.01
3,0.01,0.0,0.99,0.0,0.0
4,0.29,0.16,0.55,0.0,0.0
5,0.15,0.3,0.55,0.0,0.0
6,0.25,0.0,0.75,0.0,0.0
7,0.37,0.17,0.46,0.0,0.0
8,0.37,0.54,0.09,0.0,0.0


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01,CD8TCell,NK
1,1.0,0.0,0.0,0.0,0.0
2,0.0,0.95,0.0,0.0,0.05
3,0.01,0.03,0.85,0.11,0.0
4,0.31,0.23,0.41,0.0,0.04
5,0.18,0.39,0.37,0.0,0.06
6,0.23,0.03,0.63,0.11,0.0
7,0.35,0.26,0.34,0.0,0.04
8,0.37,0.53,0.05,0.0,0.04


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01
1,100.0,0.0,0.0
2,0.0,100.0,0.0
3,0.0,0.0,100.0
4,33.0,33.0,33.0
5,25.0,25.0,50.0
6,25.0,50.0,25.0
7,50.0,25.0,25.0
8,47.5,47.5,5.0


#### under quantile 

In [12]:
_A,_B = PP_proteins.pp_clean_irrelevant_proteins(A,B)
_A,_B = PP_proteins.pp_under_quantile(_A,_B)

_calc_and_display_with_cellmix(_A,_B,X)

Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01,CD8TCell,NK
1,0.37,0.02,0.0,0.61,0.0
2,0.79,0.21,0.0,0.0,0.0
3,0.0,0.17,0.83,0.0,0.0
4,0.0,0.65,0.35,0.0,0.0
5,0.0,0.75,0.25,0.0,0.0
6,0.0,0.33,0.67,0.0,0.0
7,0.0,0.72,0.28,0.0,0.0
8,0.58,0.14,0.0,0.28,0.0


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01,CD8TCell,NK
1,1.0,0.0,0.0,0.0,0.0
2,0.0,0.95,0.0,0.0,0.05
3,0.01,0.03,0.85,0.11,0.0
4,0.31,0.23,0.41,0.0,0.04
5,0.18,0.39,0.37,0.0,0.06
6,0.23,0.03,0.63,0.11,0.0
7,0.35,0.26,0.34,0.0,0.04
8,0.37,0.53,0.05,0.0,0.04


Unnamed: 0,LFQ intensity NOT_CD4TCellTcm_01,LFQ intensity NOT_BCellmemory_01,LFQ intensity NOT_Monocytesnonclassical_01
1,100.0,0.0,0.0
2,0.0,100.0,0.0
3,0.0,0.0,100.0
4,33.0,33.0,33.0
5,25.0,25.0,50.0
6,25.0,50.0,25.0
7,50.0,25.0,25.0
8,47.5,47.5,5.0
