In [None]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
import re
import flowkit as fk
import gc
from sklearn.metrics import mean_squared_error, roc_auc_score
import pickle as pkl
from xgboost import XGBClassifier, XGBRFRegressor
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, StackingClassifier
from lightgbm import LGBMModel,LGBMClassifier

# import warnings
# warnings.filterwarnings("ignore")

In [None]:
def get_target_events(batch_id_, target_gates, pbar):
    sample_dir = "/home/chengstark/Dev/brain-flow-data/CSF/sub_batches/{}/".format(batch_id_)
    comp_dir = "/home/chengstark/Dev/brain-flow-data/CSF/comps/"
    fksessions_pkls_dir = "/home/chengstark/Dev/brain-flow-data/CSF/fksessions_pkls/"
    
    df_comp = pd.read_csv(os.path.join(comp_dir, "batch_{}_comp.csv".format(batch_id_[0])))
    comp = fk.Matrix('batch_{}_comp'.format(batch_id_), df_comp.values, detectors=df_comp.columns)
    
    with open(fksessions_pkls_dir+'{}.pkl'.format(batch_id_), 'rb') as pklf:
        fks_fj = pkl.load(pklf)
    pbar.set_description(batch_id_, 'loaded session')
    
    gate_ids_set = set()
    for gi in fks_fj.get_gate_ids("All Samples"):
        gate_ids_set.add(gi[0])
#     print(gate_ids_set)
    
    sample_id_pop_dict = dict()
    
    sample_ids = [f for f in os.listdir(sample_dir) if 'CSF' in f and f.endswith('.fcs')]
    for sample_id in sample_ids:

        gating_rsts = fks_fj.get_gating_results("All Samples", sample_id).report
        
        init=False
        sample_cell_events = None
        for gi in target_gates:
            rows = gating_rsts.axes[0].tolist()
            row_idx = [i for i, n in enumerate(rows) if n[1] == gi]
            for ri in row_idx:
                gate_path = gating_rsts.iloc[ri]['gate_path']
                
                gi_events = fks_fj.get_gate_events(
                    "All Samples",
                    sample_id,
                    gi,
                    matrix=comp,
                    gate_path = gate_path
                )
                
                gi_events.drop('Time', inplace=True, axis=1)
                gi_events.drop('SSC-W', inplace=True, axis=1)
                gi_events.drop('SSC-H', inplace=True, axis=1)
                gi_events.drop('FSC-W', inplace=True, axis=1)
                gi_events.drop('FSC-H', inplace=True, axis=1)
                gi_events.drop('SSC-B-W', inplace=True, axis=1)
                gi_events.drop('SSC-B-H', inplace=True, axis=1)
                gi_events.drop('SSC-B-A', inplace=True, axis=1)
                gi_events.drop('Zombie NIR-A', inplace=True, axis=1)
#                 gi_events.drop('SSC-A', inplace=True, axis=1)
#                 gi_events.drop('FSC-A', inplace=True, axis=1)

                
                if not init:
                    init = True
                    sample_cell_events = gi_events.to_numpy()
                else:
                    sample_cell_events = np.concatenate((sample_cell_events, gi_events.to_numpy()), axis=0)
        print(sample_cell_events.shape)
        np.save('viable_npy/{}.npy'.format(sample_id), sample_cell_events)
        pbar.set_description('{}, {}, {}'.format(batch_id_, sample_id, sample_cell_events.shape))
    
    del fks_fj
    del gate_ids_set
    del comp
    
    gc.collect()
        
    return sample_id_pop_dict

In [None]:
# target_cell_gates = ['CD4+', 'CD8+', 'Classcial "M1"', 'non-Classical "M2"']
# target_cell_gates = ['Q1: CD45RA- , Comp-BV711-A+', 'Q2: CD45RA+ , Comp-BV711-A+', 'Q3: CD45RA+ , Comp-BV711-A-',
#                       'Q4: CD45RA- , Comp-BV711-A-', 'Classcial "M1"', 'non-Classical "M2"']
target_cell_gates = ['Dump- Viable Cells']

pbar = tqdm(sorted([x for x in os.listdir('/home/chengstark/Dev/brain-flow-data/CSF/sub_batches/') if len(x) == 2]))
all_sample_pop_dict = dict()

for batch_id in pbar:
    sample_id_pop_dict = get_target_events(batch_id, target_cell_gates, pbar)
    all_sample_pop_dict.update(sample_id_pop_dict)
    