# Combinations Processing_Batch
**Written**: 20220902

Assumes all aggregated data is in 1 batch folder (./output)

1. Compile individual chips' combos linked with a chip ID -- easier to parse
2. Concatenate
3. Batch call hits (next nb)

# Inputs & Imports

## configurable

In [None]:
# YYYYMMDD_name_ prefices for aggregated data files (ea chip)
file_ids = ['prefix1', 'prefix2']

# ID in concatenated batch, keep in same order -- tranche info & screening groups
chip_ids = ['ID1', 'ID2'] 

# batch_file_prefix_
batch_id = 'batch_'

# path to scripts
script_reroute = '../path/'

## standard

In [None]:
import os
core_base = './output/core/'
cc_base = './output/coculture/'
core_path = core_base+batch_id
cc_path =  cc_base+batch_id

os.makedirs('./output/', exist_ok=True)
os.makedirs('./output/core/', exist_ok=True)
os.makedirs('./output/coculture/', exist_ok=True)

## packages & scripts

In [None]:
import re
import glob
import numpy as np
import pandas as pd
import scipy
import scipy.stats as stats
from sklearn.metrics import auc
from statsmodels.stats.multitest import multipletests
import itertools

import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
%matplotlib inline

import sys
sys.path.insert(1, script_reroute)
import sytox_scripts.bootstrap_and_z as bsz
import sytox_scripts.supplementary as helper
import sytox_scripts.cocultures as cocultures

# Process Chip Combos
- assumes aggregated data already produced (GCP, scripted) and saved in same directory

## import & compile

In [None]:
def import_combo_summaries(save_desc='', save_dir=core_path):

    subdf_mean = pd.read_csv(save_dir+'summarized_combos'+save_desc+'_mean.csv', index_col=0)
    subdf_std = pd.read_csv(save_dir+'summarized_combos'+save_desc+'_std.csv', index_col=0)
    subdf_med = pd.read_csv(save_dir+'summarized_combos'+save_desc+'_med.csv', index_col=0)
    subdf_sem = pd.read_csv(save_dir+'summarized_combos'+save_desc+'_sem.csv', index_col=0)
    subdf_auc = pd.read_csv(save_dir+'summarized_combos'+save_desc+'_bs_aucs.csv', index_col=0)
    
    return subdf_mean, subdf_std, subdf_med, subdf_sem, subdf_auc

In [None]:
def compile_single_chip(file_id, chip_id, import_dir, save_dir):
    ''' 
    Compiles & links monoculture data to coculture data for all combinations.
    Saves kinetic & nonkinetic .csv's of formatted data, 
    as well a .csv for separated monoculture data.
    Assumes median values used though imports all data summaries.
    '''
    # imports
    co_mean, co_std, co_med, co_sem, co_auc = import_combo_summaries(save_dir=import_dir+file_id)
    bug_label, media_label = cocultures.identify_mono_labels(co_med)
    
    # compile data & export
    save_path = save_dir+chip_id+'_'
    mono_df = cocultures.pull_monocultures(co_med, co_sem, save_path, media_label, bug_label)
    co_df = cocultures.pull_cocultures(co_med, co_sem, mono_df, co_auc, chip_id, save_path, media_label)
    
    return

In [None]:
for n,f in enumerate(file_ids):
    compile_single_chip(f, chip_ids[n], core_base, cc_base)

## concatenate

In [None]:
def concatenate_files(suffix, import_dir=cc_base, save_dir=cc_path):
    ''' Creates a single batch dataset with all chips.
    '''
    files = [file for file in glob.glob(import_dir+'*'+suffix+'.csv')]
    dfs = []
    
    for f in files:
        dfs.append(pd.read_csv(f, index_col=0))
    
    concat_df = pd.concat(dfs).reset_index()
    concat_df.to_csv(save_dir+suffix+'.csv')
    
    return concat_df

In [None]:
batch_nk = concatenate_files('summarized_cocultures_nonkinetic')

In [None]:
batch_kinetic = concatenate_files('summarized_cocultures_kinetic')

In [None]:
batch_nk