In [126]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import re

In [127]:
data_tables_dir = Path("F://ucsf/bidc/IPI/analysis/cell_data_tables/")
samples = [f.name for f in data_tables_dir.iterdir()]

In [128]:
cell_types = ['double_neg_t_cell',
              'cd4_t_cell',
              'cd8_t_cell',
              'mac',
              'cdc1',
              'other_myeloid_and_b_cells',
              'double_pos_t_cell']

today = datetime.today().strftime('%Y%m%d')

In [129]:
for d in samples:

    dir_path = Path(data_tables_dir, d)
    if dir_path.is_dir():
        
        print("Classifying ",d)
        
        #find most current file
        file_dict = {}
        for file in dir_path.iterdir():
            #look for 8-character date string
            res = re.search("\d{8}",file.name)
            
            if res is not None:
                file_dict[res[0]] = file.name
                
        data_path = Path(dir_path,file_dict[max(file_dict.keys())])
        
        data = pd.read_csv(data_path)
        data['cell_type'] = ['other']*len(data)
        

        ct_idx = np.zeros((len(data), len(cell_types)))
        
        
        ct_idx[:,0]= (data['DAPI_expressed']==1) & \
                    (data['CD3_expressed']==1) & \
                    (data['CD4_expressed']==0) & \
                    (data['CD8_expressed']==0) & \
                    (data['XCR1_expressed']==0)

        ct_idx[:,1]=(data['DAPI_expressed']==1) & \
                    (data['CD4_expressed']==1) & \
                    (data['CD3_expressed']==1) & \
                    (data['CD8_expressed']==0) & \
                    (data['XCR1_expressed']==0)

        ct_idx[:,2]=(data['DAPI_expressed']==1) & \
                    (data['CD8_expressed']==1) & \
                    (data['CD3_expressed']==1) & \
                    (data['CD8_expressed']==0) & \
                    (data['XCR1_expressed']==0)

        ct_idx[:,3]=(data['DAPI_expressed']==1) & \
                    (data['CD163_expressed']==1) & \
                    (data['HLADR_expressed']==1) & \
                    (data['XCR1_expressed']==0) & \
                    (data['CD3_expressed']==0)

        ct_idx[:,4]=(data['DAPI_expressed']==1) & \
                    (data['XCR1_expressed']==1) & \
                    (data['HLADR_expressed']==1) & \
                    (data['CD3_expressed']==0) & \
                    (data['CD163_expressed']==0)

        ct_idx[:,5]=(data['DAPI_expressed']==1) & \
                    (data['HLADR_expressed']==1) & \
                    (data['CD163_expressed']==0) & \
                    (data['CD3_expressed']==0) & \
                    (data['XCR1_expressed']==0)

        ct_idx[:,6]=(data['DAPI_expressed']==1) & \
                    (data['CD3_expressed']==1) & \
                    (data['CD4_expressed']==1) & \
                    (data['CD8_expressed']==1)& \
                    (data['XCR1_expressed']==0)

        assigned_twice = np.sum(ct_idx,axis=1)>1
        
        for i,ct in enumerate(cell_types):
            
            data.loc[ct_idx[:,i],'cell_type']=ct
            
        data.loc[assigned_twice,'cell_type'] = 'assigned_twice'
        
        data.to_csv(Path(dir_path, f"{d}_single_cell_data_{today}.csv"), index=None)

Classifying  IPICRC048T1_8plex
Classifying  IPICRC053T1_8plex
Classifying  IPICRC055T1_8plex
Classifying  IPICRC057T1_8plex
Classifying  IPICRC058T1_8plex
Classifying  IPICRC062T1_8plex
Classifying  IPICRC063T1_8plex
Classifying  IPICRC072
Classifying  IPICRC075T1_8plex
Classifying  IPICRC076T1_8plex
Classifying  IPICRC077T1_8plex
Classifying  IPICRC083T1_sectionA_8plex
Classifying  IPICRC083T1_sectionB_8plex
Classifying  IPICRC085T1_8plex
Classifying  IPICRC086T1_sectionA_8plex
Classifying  IPICRC086T1_sectionB_8plex
Classifying  IPICRC087T1_8plex
Classifying  IPICRC088T1_8plex
Classifying  IPIHNSC060
Classifying  IPIHNSC065
Classifying  IPIHNSC069
Classifying  IPIKID047
Classifying  IPIKID051
Classifying  IPIKID069
Classifying  IPIKID073
Classifying  IPIKID078
Classifying  IPILUNG074
Classifying  IPILUNG081


'20220303'