In [1]:
import os
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np 
from scipy.stats import entropy
from scipy.stats import mannwhitneyu as mwu
import plotly.graph_objects as go

In [2]:
data_dir = '../Data/Clinical/'
data_dir2 = '../Result/'
out_dir = '../Result/'

cl1 = pd.read_csv( data_dir + "clinical_data_1.csv", sep=',')
cl2 = pd.read_csv( data_dir + "clinical_data_2.csv", sep=',')
max_css = pd.read_csv(data_dir2 + "useful_table/max_css_table.csv")

BCRHC01 = pd.read_csv(data_dir2 + 'aa_shannon_BCR_01_HC.csv')
BCRLC01 = pd.read_csv(data_dir2 + 'aa_shannon_BCR_01_LC.csv')
TCR01 = pd.read_csv(data_dir2 + 'aa_shannon_TCR_01.csv')
BCRHC02 = pd.read_csv(data_dir2 + 'aa_shannon_BCR_02_HC.csv')
BCRLC02 = pd.read_csv(data_dir2 + 'aa_shannon_BCR_02_LC.csv')
TCR02 = pd.read_csv(data_dir2 + 'aa_shannon_TCR_02.csv')

all = [BCRHC01, BCRLC01, TCR01, BCRHC02, BCRLC02, TCR02]
only01 = [BCRHC01, BCRLC01, TCR01]
only02 = [BCRHC02, BCRLC02, TCR02] 

BCRHC01.loc[BCRHC01['Time_point'] == 0, 'Time_point'] = 1

sev_dict = max_css['severity'].value_counts().to_dict()

print(sev_dict)
print(max_css.head())
print(BCRHC01.head())

  cl2 = pd.read_csv( data_dir + "clinical_data_2.csv", sep=',')


{1: 204, 2: 165, 3: 52, 4: 36, 5: 2}
            ID  severity
0  COV-CCO-001         2
1  COV-CCO-002         2
2  COV-CCO-003         2
3  COV-CCO-004         2
4  COV-CCO-006         1
             Sample  Row_number  Total_readcount  Shannon_diversity  \
0  COV-CCO-0411_IGG        2498            19342           6.727983   
1  COV-CCO-0411_IGM        7006            11251           8.179853   
2  COV-CCO-0412_IGG         348             1523           5.104862   
3  COV-CCO-0412_IGM        1183             1850           6.769221   
4  COV-CCO-0413_IGG        2743            12102           6.672048   

       PlnP2  Squared_Shannon  Time_point Type  
0  47.012921        45.265755           1  IGG  
1  69.480620        66.909994           1  IGM  
2  27.682505        26.059611           2  IGG  
3  46.744685        45.822355           2  IGM  
4  47.145421        44.516224           3  IGG  


In [3]:
adaptome = pd.read_csv(data_dir2 + 'max_css_adaptome_only.csv')
sevcl1 = cl1[cl1['ID'].isin(adaptome['ID'])]
sevcl2 = cl2[cl2['ID'].isin(adaptome['ID'])]

adaptome.head()

Unnamed: 0,ID,severity,from
0,COV-CCO-041,1,1
1,COV-CCO-042,1,1
2,COV-CCO-043,1,1
3,COV-CCO-044,1,1
4,COV-CCO-045,2,1


In [4]:
avirus_before01 = sevcl1[sevcl1['CMD_AVIRUS__1'] == 1]
avirus_after01 = sevcl1[sevcl1['CMD_AVIRUS__2'] == 1]
avirus_before02 = sevcl2[sevcl2['M1_AVIRUS'] == 1]
avirus_after02 = sevcl2[sevcl2['M2_AVIRUS'] == 1]

remdesivir_before01 = sevcl1[sevcl1['CMD_AVIRUSR__1'] == 1]
remdesivir_after01 = sevcl1[sevcl1['CMD_AVIRUSR__2'] == 1]
remdesivir_before02 = sevcl2[sevcl2['M1_AVIRUSR'] == 1]
remdesivir_after02 = sevcl2[sevcl2['M2_AVIRUSR'] == 1]

print(avirus_before01['ID'].nunique(), avirus_after01['ID'].nunique(), 
      avirus_before02['ID'].nunique(), avirus_after02['ID'].nunique(), '\n',
      remdesivir_before01['ID'].nunique(), remdesivir_after01['ID'].nunique(),
      remdesivir_before02['ID'].nunique(), remdesivir_after02['ID'].nunique())

0 1 9 27 
 0 1 9 27


In [5]:
# [공통]
# PNTTM           측정일자
# YES ==1, NO == 2, NaN == 99999

# [1차 수집]
# CMD_AVIRUS__1   내원시 항바이러스제 복용 여부
# CMD_AVIRUSR__1  내원시 렘데시비르 복용 여부
# CMD_AVIRUS__2   내원후 항바이러스제 복용 여부
# CMD_AVIRUSR__2  내원후 렘데시비르 복용 여부

# [2차 수집]
# M1_AVIRUS       내원시 항바이러스제 복용 여부
# M1_AVIRUSR      내원시 렘데시비르 복용 여부
# M2_AVIRUS       내원후 항바이러스제 복용 여부
# M2_AVIRUSR      내원후 렘데시비르 복용 여부

class allocate_avirus:
    def __init__(self, df1, df2, adaptome):
        self.df1 = df1
        self.df2 = df2
        self.adaptome = adaptome
        
        self.avirus_before01 = sevcl1[sevcl1['CMD_AVIRUS__1'] == 1]
        self.avirus_after01 = sevcl1[sevcl1['CMD_AVIRUS__2'] == 1]
        self.avirus_before02 = sevcl2[sevcl2['M1_AVIRUS'] == 1]
        self.avirus_after02 = sevcl2[sevcl2['M2_AVIRUS'] == 1]
        
        self.remdesivir_before01 = sevcl1[sevcl1['CMD_AVIRUSR__1'] == 1]
        self.remdesivir_after01 = sevcl1[sevcl1['CMD_AVIRUSR__2'] == 1]
        self.remdesivir_before02 = sevcl2[sevcl2['M1_AVIRUSR'] == 1]
        self.remdesivir_after02 = sevcl2[sevcl2['M2_AVIRUSR'] == 1]
        
    def allocate(self):
        self.adaptome['avirus'] = 0
        self.adaptome['avirus_rem'] = 0
        self.adaptome['avirus_aftecovid'] = 0
        
        for i in range(len(self.avirus_before01)):
            if np.isin(self.adaptome['ID'].values, self.avirus_before01.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_before01.iloc[i]['ID'], 'avirus'] = 1
        for i in range(len(self.avirus_after01)):
            if np.isin(self.adaptome['ID'].values, self.avirus_after01.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_after01.iloc[i]['ID'], 'avirus'] = 1
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_after01.iloc[i]['ID'], 'avirus_aftecovid'] = self.avirus_after01.iloc[i]['PNTTM']
        for i in range(len(self.avirus_before02)):
            if np.isin(self.adaptome['ID'].values, self.avirus_before02.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_before02.iloc[i]['ID'], 'avirus'] = 1
        for i in range(len(self.avirus_after02)):
            if np.isin(self.adaptome['ID'].values, self.avirus_after02.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_after02.iloc[i]['ID'], 'avirus'] = 1
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_after02.iloc[i]['ID'], 'avirus_aftecovid'] = self.avirus_after02.iloc[i]['PNTTM']
        
        for i in range(len(self.remdesivir_before01)):
            if np.isin(self.adaptome['ID'].values, self.remdesivir_before01.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.remdesivir_before01.iloc[i]['ID'], 'avirus_rem'] = 1
        for i in range(len(self.remdesivir_after01)):
            if np.isin(self.adaptome['ID'].values, self.remdesivir_after01.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.remdesivir_after01.iloc[i]['ID'], 'avirus_rem'] = 1
        for i in range(len(self.remdesivir_before02)):
            if np.isin(self.adaptome['ID'].values, self.remdesivir_before02.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.remdesivir_before02.iloc[i]['ID'], 'avirus_rem'] = 1
        for i in range(len(self.remdesivir_after02)):
            if np.isin(self.adaptome['ID'].values, self.remdesivir_after02.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.remdesivir_after02.iloc[i]['ID'], 'avirus_rem'] = 1
                
        return self.adaptome
    
adaptome = allocate_avirus(sevcl1, sevcl2, adaptome).allocate()

print(adaptome['avirus'].value_counts(), adaptome['avirus_rem'].value_counts(), sep='\n')
print(adaptome[(adaptome['avirus_rem'] == 1) & (adaptome['from'] == 1)])
print(adaptome[(adaptome['avirus_rem'] == 1) & (adaptome['from'] == 2)])

print(adaptome[(adaptome['avirus_rem'] == 1) & (adaptome['avirus_aftecovid'] < 7)])   # avirus 중 remdesivir 7일 이전 복용자 수
print(adaptome[(adaptome['avirus_rem'] == 1) & (adaptome['avirus_rem'] == 1)].shape)  # avirus 중 remdesivir 복용자 수 (전체라 제거)

adaptome = adaptome.drop(['avirus','avirus_aftecovid', 'avirus_rem'], axis=1)         # 일단 코드 확인용이라 제거

avirus
0    237
1     28
Name: count, dtype: int64
avirus_rem
0    237
1     28
Name: count, dtype: int64
              ID  severity  from  avirus  avirus_rem  avirus_aftecovid
210  COV-SCO-069         4     1       1           1                 7
              ID  severity  from  avirus  avirus_rem  avirus_aftecovid
215  COV-CCO-239         2     2       1           1                13
217  COV-CCO-241         4     2       1           1                10
218  COV-CCO-258         4     2       1           1                10
219  COV-CCO-304         2     2       1           1                12
220  COV-CCO-305         4     2       1           1                10
221  COV-CCO-308         4     2       1           1                 7
222  COV-CCO-321         4     2       1           1                10
223  COV-CCO-322         4     2       1           1                12
224  COV-CCO-326         4     2       1           1                 9
225  COV-CCO-331         3     2       1  

In [6]:

class allocate_avirus:
    def __init__(self, df1, df2, adaptome):
        self.df1 = df1
        self.df2 = df2
        self.adaptome = adaptome
        
        self.avirus_before01 = sevcl1[sevcl1['CMD_ABIOTIC__1'] == 1]
        self.avirus_after01 = sevcl1[sevcl1['CMD_ABIOTIC__2'] == 1]
        self.avirus_before02 = sevcl2[sevcl2['M1_ABIOTIC'] == 1]
        self.avirus_after02 = sevcl2[sevcl2['M2_ABIOTIC'] == 1]
        
    def allocate(self):
        self.adaptome['avirus2'] = 0
        
        for i in range(len(self.avirus_before01)):
            if np.isin(self.adaptome['ID'].values, self.avirus_before01.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_before01.iloc[i]['ID'], 'avirus2'] = 1
        for i in range(len(self.avirus_after01)):
            if np.isin(self.adaptome['ID'].values, self.avirus_after01.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_after01.iloc[i]['ID'], 'avirus2'] = 1
        for i in range(len(self.avirus_before02)):
            if np.isin(self.adaptome['ID'].values, self.avirus_before02.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_before02.iloc[i]['ID'], 'avirus2'] = 1
        for i in range(len(self.avirus_after02)):
            if np.isin(self.adaptome['ID'].values, self.avirus_after02.iloc[i]['ID']).any():
                self.adaptome.loc[self.adaptome['ID'] == self.avirus_after02.iloc[i]['ID'], 'avirus2'] = 1
                
        return self.adaptome
    
adaptome = allocate_avirus(sevcl1, sevcl2, adaptome).allocate()

print(adaptome['avirus2'].value_counts())

adaptome = adaptome.drop(['avirus2'], axis=1)

avirus2
0    217
1     48
Name: count, dtype: int64


In [7]:
# 1 = YES, 0 = NO

# AVIRUSR = remdesivir (렘데시비르)
# MK4482 = molnupiravir (라게브리오)
# INHIBITOR = serine protease inhibitor (nafaamostat, camostat)
# VASSOPRESSOR = vasopressor (혈압약)
# DVT = anticoagulantion & DVT prophylaxis (혈전증 예방약)
# INFLU = influenza (인플루엔자 백신)

# VACN = 백신접종 차수
# VACN(숫자) = 백신종류 (1=AZ, 2=PFZ, 3=MDN, 4=YS, 5=Novavax)

# [ 약물치료 ]
med_list = ['AVIRUSR','ABIOTIC','STEROID','PLASMA','ANTIBODY','INHIBITOR','MK4482', 'VASOPRESSOR', 'DVT']
vac_list = ['INFLU','VAC','VACN', 'VACN1', 'VACN2', 'VACN3', 'VACN4']

# SETTING = 1.중환자실 2.일반병실 3.기타
# NPRONG = 저유량 산소치료
# HFNC = 고유량 산소치료
# NIV, IV = 비침습적 인공호흡기, 침습적 인공호흡기
# CRRT = 연속적 신장대체요법 (=신장투석)
# PRONE = 소변방향압술
# BDRUG = pBRC, FFP, PLT 등 혈액제제 투여

# [ 비약물치료 ]
treatment_list = ['SETTING', 'NPRONG', 'HFNC', 'NIV', 'IV', 'ECMO', 'NOGAS','CRRT', 'PRONE', 'BDRUG']

In [7]:
class count_treatment01:
    def __init__(self, sevcl1, sevcl2, list):
        self.sevcl1 = sevcl1
        self.sevcl2 = sevcl2
        self.list = list
        
    def create_matrix(self):
        sev1_before = {i: 0 for i in self.list}
        sev1_after = {i: 0 for i in self.list}
        sev2_before = {i: 0 for i in self.list}
        sev2_after = {i: 0 for i in self.list}
    
        return sev1_before, sev1_after, sev2_before, sev2_after
    
    def count_values(self):
        sev1_before, sev1_after, sev2_before, sev2_after = self.create_matrix()
        
        for i in self.list:
            sev1_before[i] = self.sevcl1[self.sevcl1[f'CMD_{i}__1'] == 1]['ID'].nunique()
            sev1_after[i] = self.sevcl1[self.sevcl1[f'CMD_{i}__2'] == 1]['ID'].nunique()
            sev2_before[i] = self.sevcl2[self.sevcl2[f'M1_{i}'] == 1]['ID'].nunique()
            sev2_after[i] = self.sevcl2[self.sevcl2[f'M2_{i}'] == 1]['ID'].nunique()
        
        return sev1_before, sev1_after, sev2_before, sev2_after

counted = count_treatment01(sevcl1, sevcl2, med_list)
sev1_before, sev1_after, sev2_before, sev2_after = counted.count_values()

print(sev1_before, sev1_after, sev2_before, sev2_after, sep='\n')

{'AVIRUSR': 0, 'ABIOTIC': 2, 'STEROID': 0, 'PLASMA': 0, 'ANTIBODY': 0, 'INHIBITOR': 0, 'MK4482': 0, 'VASOPRESSOR': 0, 'DVT': 0}
{'AVIRUSR': 1, 'ABIOTIC': 19, 'STEROID': 6, 'PLASMA': 0, 'ANTIBODY': 10, 'INHIBITOR': 27, 'MK4482': 0, 'VASOPRESSOR': 1, 'DVT': 2}
{'AVIRUSR': 9, 'ABIOTIC': 7, 'STEROID': 9, 'PLASMA': 0, 'ANTIBODY': 2, 'INHIBITOR': 0, 'MK4482': 0, 'VASOPRESSOR': 0, 'DVT': 0}
{'AVIRUSR': 27, 'ABIOTIC': 27, 'STEROID': 27, 'PLASMA': 0, 'ANTIBODY': 0, 'INHIBITOR': 3, 'MK4482': 0, 'VASOPRESSOR': 3, 'DVT': 1}


In [8]:
class count_treatment02:
    def __init__(self, sevcl1, sevcl2, list):
        self.sevcl1 = sevcl1
        self.sevcl2 = sevcl2
        self.list = list

    def create_matrix(self):
        sev1_treat = {i: 0 for i in self.list}
        sev2_treat = {i: 0 for i in self.list}
    
        return sev1_treat, sev2_treat
    
    def count_treat(self):
        sev1_treat, sev2_treat = self.create_matrix()
        
        for i in self.list:
            sev1_treat[i] = self.sevcl1[self.sevcl1[f'CMD_{i}'] == 1]['ID'].nunique()
            sev2_treat[i] = self.sevcl2[self.sevcl2[f'M2_{i}'] == 1]['ID'].nunique()
        
        return sev1_treat, sev2_treat

counted = count_treatment02(sevcl1, sevcl2, treatment_list)
sev1_treat, sev2_treat = counted.count_treat()

print(sev1_treat, sev2_treat, sep='\n')

{'SETTING': 6, 'NPRONG': 2, 'HFNC': 1, 'NIV': 0, 'IV': 0, 'ECMO': 0, 'NOGAS': 0, 'CRRT': 0, 'PRONE': 0, 'BDRUG': 3}
{'SETTING': 22, 'NPRONG': 14, 'HFNC': 20, 'NIV': 0, 'IV': 19, 'ECMO': 7, 'NOGAS': 0, 'CRRT': 3, 'PRONE': 1, 'BDRUG': 9}


In [9]:
med_list = ['AVIRUSR','ABIOTIC','STEROID','ANTIBODY','INHIBITOR']
treat_list = ['SETTING', 'NPRONG', 'HFNC', 'IV', 'ECMO', 'CRRT', 'BDRUG']
med = ['remdesivir', 'abiotic', 'steroid', 'antibody', 'inhibitor']
treat = ['icu', 'nprong', 'hfnc', 'iv', 'ecmo', 'crrt', 'bdrug']

class allocate_clinicals:
    def __init__(self, df1, df2, adaptome, list1, list2):
        self.df1 = df1
        self.df2 = df2
        self.adapts = adaptome
        self.list1 = list1
        self.list2 = list2
        
        self.med = ['remdesivir', 'abiotic', 'steroid', 'antibody', 'inhibitor']
        self.treat = ['icu', 'nprong', 'hfnc', 'iv', 'ecmo', 'crrt', 'bdrug']
    
        for i , j in zip(self.list1, self.med):
            setattr(self, f'{j}_before01', self.df1[self.df1[f'CMD_{i}__1'] == 1])
            setattr(self, f'{j}_after01', self.df1[self.df1[f'CMD_{i}__2'] == 1])
            setattr(self, f'{j}_before02', self.df2[self.df2[f'M1_{i}'] == 1])
            setattr(self, f'{j}_after02', self.df2[self.df2[f'M2_{i}'] == 1])

        for k , m in zip(self.list2, self.treat):
            setattr(self, f'{m}_01', self.df1[self.df1[f'CMD_{k}'] == 1])
            setattr(self, f'{m}_02', self.df2[self.df2[f'M2_{k}'] == 1])

    def allocate(self):
        
        for j in med:
            self.adapts[f'{j}'] = 0
            self.adapts[f'{j}_after'] = 0
            
        for m in treat:
            self.adapts[f'{m}'] = 0
        
        for j in med:
            for i in range(len(getattr(self, f'{j}_before01'))):
                if np.isin(self.adapts['ID'].values, getattr(self, f'{j}_before01').iloc[i]['ID']).any():
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{j}_before01').iloc[i]['ID'], f'{j}'] = 1
            for i in range(len(getattr(self, f'{j}_after01'))):
                if np.isin(self.adapts['ID'].values, getattr(self, f'{j}_after01').iloc[i]['ID']).any():
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{j}_after01').iloc[i]['ID'], f'{j}'] = 1
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{j}_after01').iloc[i]['ID'], f'{j}_after'] = getattr(self, f'{j}_after01').iloc[i]['PNTTM']
            for i in range(len(getattr(self, f'{j}_before02'))):
                if np.isin(self.adapts['ID'].values, getattr(self, f'{j}_before02').iloc[i]['ID']).any():
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{j}_before02').iloc[i]['ID'], f'{j}'] = 1
            for i in range(len(getattr(self, f'{j}_after02'))):
                if np.isin(self.adapts['ID'].values, getattr(self, f'{j}_after02').iloc[i]['ID']).any():
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{j}_after02').iloc[i]['ID'], f'{j}'] = 1
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{j}_after02').iloc[i]['ID'], f'{j}_after'] = getattr(self, f'{j}_after02').iloc[i]['PNTTM']
            
        for m in treat:
            for k in range(len(getattr(self, f'{m}_01'))):
                if np.isin(self.adapts['ID'].values, getattr(self, f'{m}_01').iloc[k]['ID']).any():
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{m}_01').iloc[k]['ID'], f'{m}'] = 1
            for k in range(len(getattr(self, f'{m}_02'))):
                if np.isin(self.adapts['ID'].values, getattr(self, f'{m}_02').iloc[k]['ID']).any():
                    self.adapts.loc[self.adapts['ID'] == getattr(self, f'{m}_02').iloc[k]['ID'], f'{m}'] = 1
        
        return self.adapts
    
adapts = allocate_clinicals(sevcl1, sevcl2, adaptome, med_list, treat_list).allocate()
print(adapts)

              ID  severity  from  remdesivir  remdesivir_after  abiotic  \
0    COV-CCO-041         1     1           0                 0        0   
1    COV-CCO-042         1     1           0                 0        0   
2    COV-CCO-043         1     1           0                 0        0   
3    COV-CCO-044         1     1           0                 0        0   
4    COV-CCO-045         2     1           0                 0        0   
..           ...       ...   ...         ...               ...      ...   
260  COV-CNC-111         0     2           0                 0        0   
261  COV-CNC-113         0     2           0                 0        0   
262  COV-CNC-115         0     2           0                 0        0   
263  COV-CNC-117         0     2           0                 0        0   
264  COV-MCO-010         4     2           1                10        1   

     abiotic_after  steroid  steroid_after  antibody  antibody_after  \
0                0        0

In [10]:
adapts.to_csv(out_dir + 'adapt_clinical.csv', index=False)
adapts

Unnamed: 0,ID,severity,from,remdesivir,remdesivir_after,abiotic,abiotic_after,steroid,steroid_after,antibody,antibody_after,inhibitor,inhibitor_after,icu,nprong,hfnc,iv,ecmo,crrt,bdrug
0,COV-CCO-041,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,COV-CCO-042,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,COV-CCO-043,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,COV-CCO-044,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,COV-CCO-045,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260,COV-CNC-111,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
261,COV-CNC-113,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
262,COV-CNC-115,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
263,COV-CNC-117,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
### severity 별로 remdesivir, abiotic, steroid, antibody, inhibitor, icu, nprong, hfnc, iv, ecmo, crrt, bdrug 의 투여 == 1 인 개수에 대한 표 작성 -> 개수(비율)로 표현

def count_table(adapts):
    med = ['remdesivir', 'abiotic', 'steroid', 'antibody', 'inhibitor']
    treat = ['icu', 'nprong', 'hfnc', 'iv', 'ecmo', 'crrt', 'bdrug']
    
    count_table = pd.DataFrame(index=med+treat, columns=['sev0', 'sev1', 'sev2', 'sev3', 'sev4'])
    
    for i in med:
        for j in range(5):
            count_table.loc[i][f'sev{j}'] = len(adapts[(adapts[i] == 1) & (adapts['severity'] == j)])
    for k in treat:
        for l in range(5):
            count_table.loc[k][f'sev{l}'] = len(adapts[(adapts[k] == 1) & (adapts['severity'] == l)])
            
    return count_table

ctable = count_table(adapts)
print(ctable)

           sev0 sev1 sev2 sev3 sev4
remdesivir    0    0    2    9   17
abiotic       2    6   13   10   17
steroid       0    0    7    9   17
antibody      0    6    4    0    2
inhibitor     0   11   15    1    3
icu           0    0    6    5   17
nprong        0    0    2    9    5
hfnc          0    0    2    4   15
iv            0    0    2    2   15
ecmo          0    0    1    0    6
crrt          0    0    0    0    3
bdrug         0    2    2    1    7


In [28]:
vac_list = ['VAC']
# vac_list2 = ['VACN', 'VACN1', 'VACN2', 'VACN3', 'VACN4']

### sevcl1 과 sevcl2 에서 각각 INFLU, VAC == 1 인 경우, 같은 ID의 adaptome['influenza'] 또는 [vaccine]에 1을 추가
### 이때, sevcl1에서는 CMD_INFLU__1, sevcl2에서는 M1_INFLU 의 꼴로 존재함 

class allocate_clinicals:
    def __init__(self, df1, df2, df3, vac_list):
        self.df1 = df1
        self.df2 = df2
        self.vac_list = vac_list
        self.adapts = df3
        
        for i in vac_list:
            # setattr(self, f'{i}_01', self.df1[self.df1[f'CMD_{i}__1'] == 1])
            setattr(self, f'{i}_02', self.df2[self.df2[f'M1_{i}'] == 1])
            
    def allocate(self):
            
            for j in vac_list:
                self.adapts[f'{j}'] = 0
                
            for i in vac_list:
            #     for k in range(len(getattr(self, f'{i}_01'))):
            #         if np.isin(self.adapts['ID'].values, getattr(self, f'{i}_01').iloc[k]['ID']).any():
            #             self.adapts.loc[self.adapts['ID'] == getattr(self, f'{i}_01').iloc[k]['ID'], f'{i}'] = 1
                for l in range(len(getattr(self, f'{i}_02'))):
                    if np.isin(self.adapts['ID'].values, getattr(self, f'{i}_02').iloc[l]['ID']).any():
                        self.adapts.loc[self.adapts['ID'] == getattr(self, f'{i}_02').iloc[l]['ID'], f'{i}'] = 1
                
            return self.adapts
        
adapts_vac = allocate_clinicals(sevcl1, sevcl2, adaptome, vac_list).allocate()
print(adapts_vac)

              ID  severity  from  remdesivir  remdesivir_after  abiotic  \
0    COV-CCO-041         1     1           0                 0        0   
1    COV-CCO-042         1     1           0                 0        0   
2    COV-CCO-043         1     1           0                 0        0   
3    COV-CCO-044         1     1           0                 0        0   
4    COV-CCO-045         2     1           0                 0        0   
..           ...       ...   ...         ...               ...      ...   
260  COV-CNC-111         0     2           0                 0        0   
261  COV-CNC-113         0     2           0                 0        0   
262  COV-CNC-115         0     2           0                 0        0   
263  COV-CNC-117         0     2           0                 0        0   
264  COV-MCO-010         4     2           1                10        1   

     abiotic_after  steroid  steroid_after  antibody  ...  inhibitor_after  \
0                0   

In [29]:
adaptome.columns

Index(['ID', 'severity', 'from', 'remdesivir', 'remdesivir_after', 'abiotic',
       'abiotic_after', 'steroid', 'steroid_after', 'antibody',
       'antibody_after', 'inhibitor', 'inhibitor_after', 'icu', 'nprong',
       'hfnc', 'iv', 'ecmo', 'crrt', 'bdrug', 'INFLU', 'VAC'],
      dtype='object')

In [30]:
### adaptome의 INFLU를 기준으로 severity 별로 influenza에 대한 투여 여부를 표로 작성

def count_table(adapts):
    vac = ['VAC']
    
    count_table = pd.DataFrame(index=vac, columns=['sev0', 'sev1', 'sev2', 'sev3', 'sev4'])
    
    for i in vac:
        for j in range(5):
            count_table.loc[i][f'sev{j}'] = len(adapts[(adapts[i] == 1) & (adapts['severity'] == j)])
            
    return count_table

ctable_vac = count_table(adapts_vac)
print(ctable_vac)

    sev0 sev1 sev2 sev3 sev4
VAC   41    0    0    9    5
