<h1>Normalisation for the removal of batch effect</h1>

In [2]:
import sys
if '/home/ross/immunova' not in sys.path:
    sys.path.append('/home/ross/immunova')
from immunova.data.mongo_setup import pd_init
from immunova.data.fcs_experiments import FCSExperiment
from immunova.flow.gating.actions import Gating, Template
from immunova.flow.normalisation.normalise import Normalise
from immunova.flow.deep_gating.deep_gating import calculate_reference_sample
from warnings import filterwarnings
from tqdm import tqdm_notebook
import matplotlib
import pandas as pd
import os
filterwarnings('ignore')
pd_init()

In [3]:
texp = FCSExperiment.objects(experiment_id='PD_T_PDMCs').get()

<h2>Calculate Reference Sample</h2>

In [4]:
exclude = ['142-09_pdmc_t',
 '210-14_pdmc_t',
 '273-01_pdmc_t',
 '276-01_pdmc_t',
 '286-03_pdmc_t',
 '298-01_pdmc_t',
 '305-01_pdmc_t',
 '308-02R_pdmc_t',
 '315-01_pdmc_t',
 '322-01_pdmc_t',
 '323-01_pdmc_t',
 '324-01_pdmc_t',
 '302-01_pdmc_t']

In [5]:
reference_sample = calculate_reference_sample(texp, exclude_samples=exclude)

Running comparisons....
----------------------- 142-09_pdmc_t -----------------------
Skipping 142-09_pdmc_t; found in exclude list
----------------------- 165-09_pdmc_t -----------------------
Estimating covariance matrix
Make comparisons to other samples...
Compare to 165-09_pdmc_t..
Compare to 175-09_pdmc_t..


ValueError: operands could not be broadcast together with shapes (17,17) (18,18) 

In [4]:
from immunova.flow.gating.transforms import apply_transform
import numpy as np
def pull_data(sid, experiment):
    d = experiment.pull_sample_data(sample_id=sid, data_type='raw')
    if d is None:
        return None
    d = [x for x in d if x['typ'] == 'complete'][0]['data']
    d = d[[x for x in d.columns if x != 'Time']]
    return apply_transform(d, transform_method='log_transform')

In [13]:
x = '165-09_pdmc_t'
y = '175-09_pdmc_t'

In [15]:
xd = pull_data(x, texp)
yd = pull_data(y, texp)

In [19]:
xcov = np.cov(xd, rowvar=False)
ycov = np.cov(yd, rowvar=False)

In [22]:
yd.columns

Index(['FSC-A', 'FSC-H', 'FSC-W', 'SSC-A', 'SSC-H', 'SSC-W', 'CXCR3', 'CD161',
       'CD3', 'CCR7', 'L/D', 'Va7.2', 'CD8', 'Vd2', 'CD45RA', 'PanGD', 'CD4',
       'CD27'],
      dtype='object')

In [23]:
xd.columns

Index(['FSC-A', 'FSC-H', 'SSC-A', 'SSC-H', 'SSC-W', 'CXCR3', 'CD161', 'CD3',
       'CCR7', 'L/D', 'Va7.2', 'CD8', 'Vd2', 'CD45RA', 'PanGD', 'CD4', 'CD27'],
      dtype='object')

In [24]:
[x for x in yd.columns if x in xd.columns]

['FSC-A',
 'FSC-H',
 'SSC-A',
 'SSC-H',
 'SSC-W',
 'CXCR3',
 'CD161',
 'CD3',
 'CCR7',
 'L/D',
 'Va7.2',
 'CD8',
 'Vd2',
 'CD45RA',
 'PanGD',
 'CD4',
 'CD27']

In [19]:
def find_common_features(experiment: FCSExperiment):
    
    def is_common(x, a):
        return all([x in c for c in a])
    
    def pull(sid):
        d = experiment.pull_sample_data(sample_id=sid, data_type='raw', include_controls=False)
        return [x for x in d if x['typ'] == 'complete'][0]['data']
    
    all_features = list(map(lambda x: list(pull(x).columns),
                            experiment.list_samples()))
    common_features = set(all_features[0])
    for f in all_features[1:]:
        common_features.intersection_update(f)
    return common_features

In [20]:
from datetime import datetime
start = datetime.now()
common = find_common_features(texp)
end = datetime.now()
print(f'Time: {(end-start).total_seconds()}')

Time: 50.983219


In [21]:
common

{'CCR7',
 'CD161',
 'CD27',
 'CD3',
 'CD4',
 'CD45RA',
 'CD8',
 'CXCR3',
 'FSC-A',
 'FSC-H',
 'L/D',
 'PanGD',
 'SSC-A',
 'SSC-W',
 'Time',
 'Va7.2',
 'Vd2'}