In [1]:
import sys
if '/home/ross/immunova' not in sys.path:
    sys.path.append('/home/ross/immunova')
from immunova.data.patient import Patient, Bug, Biology, Drug
from immunova.data.fcs_experiments import FCSExperiment
from immunova.data.utilities import get_fcs_file_paths
from immunova.data.mongo_setup import pd_init
import pandas as pd
import os
pd_init()

In [2]:
t_pbmc_exp = FCSExperiment.objects(experiment_id='PD_T_PBMCs').get()
t_pdmc_exp = FCSExperiment.objects(experiment_id='PD_T_PDMCs').get()
n_pdmc_exp = FCSExperiment.objects(experiment_id='PD_N_PDMCs').get()
n_pbmc_exp = FCSExperiment.objects(experiment_id='PD_N_PBMCs').get()

In [3]:
meta = pd.read_excel('pd_metadata.xlsx', sheet_name='All_Data')

In [4]:
meta = meta[meta.columns[:11]]
meta.head()

Unnamed: 0,pt_no,status,flag,cell_origin,dob,commenced_pd,processing_date,gender,age_years,age_days,time_on_pd
0,142-09,Peritonitis,,PBMC,1950-03-06,2015-06-29,2017-09-26,M,67.559206,24676,820
1,165-09,Stable,,PBMC,1941-03-17,2013-04-08,2017-12-13,M,76.741958,28030,1710
2,175-09,Stable,,PBMC,1934-04-19,2013-01-29,2018-06-20,M,84.265572,30778,2003
3,209-03,Peritonitis,,PBMC,1948-02-15,2014-05-19,2017-04-16,M,69.166324,25263,1063
4,209-05,Stable,,PBMC,1948-02-15,2014-05-19,2017-11-08,M,69.730322,25469,1269


In [5]:
meta.shape

(104, 11)

In [6]:
unique_patients = meta.drop(['cell_origin', 'flag', 'processing_date'], axis=1)
unique_patients = unique_patients.drop_duplicates()
unique_patients.head()

Unnamed: 0,pt_no,status,dob,commenced_pd,gender,age_years,age_days,time_on_pd
0,142-09,Peritonitis,1950-03-06,2015-06-29,M,67.559206,24676,820
1,165-09,Stable,1941-03-17,2013-04-08,M,76.741958,28030,1710
2,175-09,Stable,1934-04-19,2013-01-29,M,84.265572,30778,2003
3,209-03,Peritonitis,1948-02-15,2014-05-19,M,69.166324,25263,1063
4,209-05,Stable,1948-02-15,2014-05-19,M,69.730322,25469,1269


In [7]:
len(set(unique_patients.pt_no.values))

57

In [8]:
unique_patients.shape[0]

57

<h1>Create patient objects</h1>

In [9]:
def check_null(x):
    if pd.isna(x):
        return None
    return x

def process_gender(g):
    if g == 'M':
        return 0
    if g == 'F':
        return 1
    return None

def create_patient(r):
    patient = Patient()
    patient.patient_id = check_null(r.pt_no)
    patient.status = check_null(r.status)
    patient.dob = check_null(r.dob)
    patient.commenced_pd = check_null(r.commenced_pd)
    patient.gender = process_gender(r.gender)
    patient.age = check_null(r.age_years)
    patient.time_on_pd = check_null(r.time_on_pd)
    return patient

In [10]:
patients = unique_patients.apply(create_patient, axis=1)

<h1>Add Microbiology</h1>

In [11]:
micro = pd.read_excel('pd_metadata.xlsx', sheet_name='Microbiology')
micro.date = micro.date.astype(object).where(micro.date.notnull(), None)
name_key = pd.read_excel('pd_metadata.xlsx', sheet_name='micro_name_key')
code_key = pd.read_excel('pd_metadata.xlsx', sheet_name='micro_codes')

In [12]:
def get_org_name(org_key):
    if pd.isna(org_key):
        return None
    ref = name_key[name_key.key == org_key]
    if ref.shape[0] == 0:
        print(f'{org_key} does not match any key in reference sheet')
        return org_key
    return ref.name.values[0]

def get_bug_info(code):
    if pd.isna(code):
        return None, None, None
    def check_null(x):
        if pd.isna(x):
            return None
        return x
    ref = code_key[code_key.code == code]
    if ref.shape[0] == 0:
        print(f'{code} not match any code in reference sheet')
        return None, None, None
    return check_null(ref.gram_status.values[0]), check_null(ref.hmbpp.values[0]), check_null(ref.ribo.values[0])
    

def add_micro(patient, micro_ref):
    ref = micro_ref[micro_ref.pt_id == patient.patient_id]
    if ref.shape[0] != 0:
        data = ref.to_dict('records')
    else:
        return patient
    bugs = list()
    for x in data:
        gram, hmbpp, ribo = get_bug_info(x['code'])
        new_bug = Bug(org_name=get_org_name(x['org']),
                     report_date=x['date'],
                     gram_status=gram,
                     hmbpp_status=hmbpp)
        bugs.append(new_bug)
    patient.infection_data = bugs
    return patient

In [13]:
patients_with_micro = list()
for p in patients:
    patients_with_micro.append(add_micro(p, micro))

In [14]:
for p in patients:
    p.save()

<h1>Add Flow Cytometry Data</h1>

In [15]:
def fetch_files(patient_id, status):
    n_ctrl_names = ['CD1c', 'HLA-DR']
    t_ctrl_names = ['CXCR3', 'CD27', 'CD45RA', 'CCR7']
    root = f'/media/ross/extdrive/PD_DS_Friendly/{status}/{patient_id}/'
    if not os.path.isdir(root):
        print(f'No file directory found for patient {patient_id} with status {status}')
        return None
    # PBMC_N
    pbmc_n = get_fcs_file_paths(f'{root}PBMC_N', n_ctrl_names, 'FMO')
    # PBMC_T
    pbmc_t = get_fcs_file_paths(f'{root}PBMC_T', t_ctrl_names, 'FMO')
    # PDMC_N
    pdmc_n = get_fcs_file_paths(f'{root}PDMC_N', n_ctrl_names, 'FMO')
    # PDMC_T
    pdmc_t = get_fcs_file_paths(f'{root}PDMC_T', t_ctrl_names, 'FMO')
    
    return dict(pdmc_n=pdmc_n, pdmc_t=pdmc_t, pbmc_n=pbmc_n, pbmc_t=pbmc_t)

def add_flow_data(patient):
    patient_id = patient.patient_id
    status = patient.status
    print(f'----------------------- {patient_id} -----------------------')
    files = fetch_files(patient_id, status)
    if files is None:
        return
    # Add PBMC files
    # T panel
    if len(files['pbmc_t']['primary']) == 1:
        t_pbmc_exp.add_new_sample(sample_id=f'{patient_id}_pbmc_t',
                                 file_path=files['pbmc_t']['primary'][0],
                                 controls=files['pbmc_t']['controls'],
                                 patient_id=patient_id)
    else:
        print(f'Check PBMC T panel files for {patient_id} manually')
    # N panel
    if len(files['pbmc_n']['primary']) == 1:
        n_pbmc_exp.add_new_sample(sample_id=f'{patient_id}_pbmc_n',
                                 file_path=files['pbmc_n']['primary'][0],
                                 controls=files['pbmc_n']['controls'],
                                 patient_id=patient_id)
    else:
        print(f'Check PBMC N panel files for {patient_id} manually')
    # Add PDMC files
    # T panel
    if len(files['pdmc_t']['primary']) == 1:
        t_pdmc_exp.add_new_sample(sample_id=f'{patient_id}_pdmc_t',
                                 file_path=files['pdmc_t']['primary'][0],
                                 controls=files['pdmc_t']['controls'],
                                 patient_id=patient_id)
    else:
        print(f'Check PDMC T panel files for {patient_id} manually')
    # N panel
    if len(files['pdmc_n']['primary']) == 1:
        n_pdmc_exp.add_new_sample(sample_id=f'{patient_id}_pdmc_n',
                                 file_path=files['pdmc_n']['primary'][0],
                                 controls=files['pdmc_n']['controls'],
                                 patient_id=patient_id)
    else:
        print(f'Check PDMC N panel files for {patient_id} manually')
    print(f'{patient_id} complete')

In [16]:
for p in patients:
    add_flow_data(p)

----------------------- 142-09 -----------------------
Generating main file entry...
Missing channel FSC-W
Missing channel SSC-H
Generating file entries for controls...
Successfully created 142-09_pbmc_t and associated to PD_T_PBMCs
Check PBMC N panel files for 142-09 manually
Generating main file entry...
Missing channel FSC-W
Missing channel SSC-H
Generating file entries for controls...
Missing channel FSC-W
Missing channel SSC-H
Missing channel FSC-W
Missing channel SSC-H
Missing channel FSC-W
Missing channel SSC-H
Successfully created 142-09_pdmc_t and associated to PD_T_PDMCs
Generating main file entry...
Missing channel FSC-W
Generating file entries for controls...
Missing channel FSC-W
Missing channel FSC-W
Successfully created 142-09_pdmc_n and associated to PD_N_PDMCs
142-09 complete
----------------------- 165-09 -----------------------
Generating main file entry...
Missing channel FSC-W
Generating file entries for controls...
Missing channel FSC-W
Missing channel FSC-W
Missi