# Explorando as tabela de careplans, immunizations, observations e procedures

Leitura dos arquivos .csv de *careplans*, *immunizations*, *procedures* e *observations*, além dos dados de NF e dos arquivos com as *features* definidas através do notebook "features_medications.ipynb". Note que, como os arquivos de observações eram muito grandes para o upload no GitHub, fizemos o upload apenas dos arquivos de observações filtrados por pacientes de interesse (filtragem realizada da mesma forma que será feita para os outros arquivos neste notebook).

As colunas referentes a datas também foram convertidas para *datetime*.

In [188]:
import pandas as pd
import numpy as np

#Cenário 01

careplans01 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/external/scenario01/careplans.csv')
careplans01['START'] = pd.to_datetime(careplans01['START'])
careplans01['STOP'] = pd.to_datetime(careplans01['STOP'])

immunizations01 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/external/scenario01/immunizations.csv')
immunizations01['DATE'] = pd.to_datetime(immunizations01['DATE'])

procedures01 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/external/scenario01/procedures.csv')
procedures01['START'] = pd.to_datetime(procedures01['START'])
procedures01['STOP'] = pd.to_datetime(procedures01['STOP'])

observations01_i = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/interim/interest/observations01_i.csv')
observations01_i['DATE'] = pd.to_datetime(observations01_i['DATE'])

scenario01 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/interim/features/features_medications_01.csv')
scenario01['BIRTHDATE'] = pd.to_datetime(scenario01['BIRTHDATE'])

nf01 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/interim/interest/nf01.csv')
nf01['START'] = pd.to_datetime(nf01['START'])
nf01['STOP'] = pd.to_datetime(nf01['STOP'])

#Cenário 02

careplans02 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/external/scenario02/careplans.csv')
careplans02['START'] = pd.to_datetime(careplans02['START'])
careplans02['STOP'] = pd.to_datetime(careplans02['STOP'])

immunizations02 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/external/scenario02/immunizations.csv')
immunizations02['DATE'] = pd.to_datetime(immunizations02['DATE'])

procedures02 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/external/scenario02/procedures.csv')
procedures02['START'] = pd.to_datetime(procedures02['START'])
procedures02['STOP'] = pd.to_datetime(procedures02['STOP'])

observations02_i = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/interim/interest/observations02_i.csv')
observations02_i['DATE'] = pd.to_datetime(observations02_i['DATE'])

scenario02 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/interim/features/features_medications_02.csv')
scenario02['BIRTHDATE'] = pd.to_datetime(scenario02['BIRTHDATE'])

nf02 = pd.read_csv('https://github.com/caiops/P2_IA368X/raw/main/data/interim/interest/nf02.csv')
nf02['START'] = pd.to_datetime(nf02['START'])
nf02['STOP'] = pd.to_datetime(nf02['STOP'])

Filtrando os dados de *careplans*, *immunizations* e *procedures* apenas para os pacientes de interesse.

In [189]:
#Obtendo as listas de pacientes
patients01 = scenario01['PATIENT'].tolist()
patients02 = scenario02['PATIENT'].tolist()
#Obtendo as listas de pacientes de interesse que sobreviveram ou não à NF
death01 = scenario01[scenario01['DEATH_FN'] == 1]['PATIENT'].tolist()
death02 = scenario02[scenario02['DEATH_FN'] == 1]['PATIENT'].tolist()
survive01 = scenario01[scenario01['DEATH_FN'] == 0]['PATIENT'].tolist()
survive02 = scenario02[scenario02['DEATH_FN'] == 0]['PATIENT'].tolist()

In [190]:
#Obtendo os dados para os pacientes de interesse
#Cenário 01
careplans01_i = careplans01.query('PATIENT in @patients01').copy()
immunizations01_i = immunizations01.query('PATIENT in @patients01').copy()
procedures01_i = procedures01.query('PATIENT in @patients01').copy()
#Cenário 02
careplans02_i = careplans02.query('PATIENT in @patients02').copy()
immunizations02_i = immunizations02.query('PATIENT in @patients02').copy()
procedures02_i = procedures02.query('PATIENT in @patients02').copy()

## Observations

Encontrando os dados de observações cuja data é anterior ou igual à data de início da NF.

In [191]:
#Mudando as datas das observações do formato data/hora para apenas data, de modo a realizar a comparação seguinte
observations01_i['DATE'] = observations01_i['DATE'].dt.date
observations01_i['DATE'] = pd.to_datetime(observations01_i['DATE'])

observations02_i['DATE'] = observations02_i['DATE'].dt.date
observations02_i['DATE'] = pd.to_datetime(observations02_i['DATE'])

In [192]:
#Obtendo as observações realizadas antes ou no dia que a NF começou
observations01_i_b = pd.DataFrame(columns=['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE'])
for p in patients01:
  date = nf01.loc[nf01['PATIENT'] == p, 'START'].values[0]
  observations01_i_b = pd.concat([observations01_i_b, observations01_i.query('PATIENT == @p & DATE <= @date')[['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']]], ignore_index=True)

display(observations01_i_b)

observations02_i_b = pd.DataFrame(columns=['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE'])
for p in patients02:
  date = nf02.loc[nf02['PATIENT'] == p, 'START'].values[0]
  observations02_i_b = pd.concat([observations02_i_b, observations02_i.query('PATIENT == @p & DATE <= @date')[['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']]], ignore_index=True)

display(observations02_i_b)

Unnamed: 0,PATIENT,DATE,CODE,DESCRIPTION,VALUE,UNITS,TYPE
0,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,8302-2,Body Height,84.4,cm,numeric
1,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,72514-3,Pain severity - 0-10 verbal numeric rating [Sc...,0.0,{score},numeric
2,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,29463-7,Body Weight,10.8,kg,numeric
3,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,77606-2,Weight-for-length Per age and sex,11.5,%,numeric
4,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,9843-4,Head Occipital-frontal circumference,47.6,cm,numeric
...,...,...,...,...,...,...,...
13572,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1947-06-24,72166-2,Tobacco smoking status NHIS,Never smoker,,text
13573,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1947-06-24,70274-6,Generalized anxiety disorder 7 item (GAD-7) to...,0.0,{score},numeric
13574,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1947-06-24,89204-2,Patient Health Questionnaire-9: Modified for T...,4.0,{score},numeric
13575,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1948-05-19,751-8,Neutrophils [#/volume] in Blood by Automated c...,340.5,10*3/uL,numeric


Unnamed: 0,PATIENT,DATE,CODE,DESCRIPTION,VALUE,UNITS,TYPE
0,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,8302-2,Body Height,64.5,cm,numeric
1,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,72514-3,Pain severity - 0-10 verbal numeric rating [Sc...,1.0,{score},numeric
2,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,29463-7,Body Weight,6.7,kg,numeric
3,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,77606-2,Weight-for-length Per age and sex,13.0,%,numeric
4,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,9843-4,Head Occipital-frontal circumference,43.0,cm,numeric
...,...,...,...,...,...,...,...
12251,2e3402a9-45ff-3558-8450-375f17107ca0,2016-06-25,QOLS,QOLS,1.0,{score},numeric
12252,2e3402a9-45ff-3558-8450-375f17107ca0,2017-06-25,QOLS,QOLS,1.0,{score},numeric
12253,2e3402a9-45ff-3558-8450-375f17107ca0,2018-06-25,QOLS,QOLS,1.0,{score},numeric
12254,2e3402a9-45ff-3558-8450-375f17107ca0,2019-06-25,QOLS,QOLS,1.0,{score},numeric


Obtendo a contagem de valores únicos para os identificadores de pacientes e as descrições das observações.

In [193]:
print('Cenário 01\n')
print('Contagem de observações para cada paciente:')
print(observations01_i_b['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de observações:')
print(observations01_i_b['DESCRIPTION'].value_counts())
print('\nCenário 02\n')
print('Contagem de observações para cada paciente:')
print(observations02_i_b['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de observações:')
print(observations02_i_b['DESCRIPTION'].value_counts())

Cenário 01

Contagem de observações para cada paciente:
a259d333-6bb2-30f5-ecb9-d5ff168905b0    242
6ada49d6-cce2-faea-781d-5f4c282c49b4    232
98342e81-23da-dae8-5506-8da094c7d29e    221
fbb23e67-a3a6-1662-ab90-e9f717c61a97    215
9eb99407-b81e-2594-0da2-870e1d88d18c    200
                                       ... 
c022272d-ecd2-92b6-32f4-3769ef3514f9      5
2b7f1733-b575-269e-39d0-a96df81c410d      2
03ecc624-4fba-ad89-6a8c-5f197dd1a9fc      2
7796fd57-7cb0-c895-5b1a-24dd98001bf0      2
27925006-9781-ba3f-19cc-af346d18fde7      2
Name: PATIENT, Length: 137, dtype: int64

Contagem dos tipos únicos de observações:
Diastolic Blood Pressure                                         973
Systolic Blood Pressure                                          973
Body Weight                                                      958
Heart rate                                                       958
Respiratory rate                                                 958
                               

Chamou nossa atenção as observações do tipo "Neutrophils [#/volume] in Blood by Automated count" e "Body Temperature". Decidimos, então, filtrar as observações realizadas apenas no dia em que se iniciou a NF de cada paciente.

In [194]:
#Obtendo as observações realizadas no dia que a NF começou
observations01_i_nf = pd.DataFrame(columns=['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE'])
for p in patients01:
  date = nf01.loc[nf01['PATIENT'] == p, 'START'].values[0]
  observations01_i_nf = pd.concat([observations01_i_nf, observations01_i.query('PATIENT == @p & DATE == @date')[['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']]], ignore_index=True)

display(observations01_i_nf)

observations02_i_nf = pd.DataFrame(columns=['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE'])
for p in patients02:
  date = nf02.loc[nf02['PATIENT'] == p, 'START'].values[0]
  observations02_i_nf = pd.concat([observations02_i_nf, observations02_i.query('PATIENT == @p & DATE == @date')[['PATIENT', 'DATE', 'CODE', 'DESCRIPTION', 'VALUE', 'UNITS', 'TYPE']]], ignore_index=True)

display(observations02_i_nf)

Unnamed: 0,PATIENT,DATE,CODE,DESCRIPTION,VALUE,UNITS,TYPE
0,4288f90b-4774-c329-3176-c1482e824c04,2012-07-12,751-8,Neutrophils [#/volume] in Blood by Automated c...,255.1,10*3/uL,numeric
1,4288f90b-4774-c329-3176-c1482e824c04,2012-07-12,8310-5,Body Temperature,101.2,[degF],numeric
2,f03f50be-20b1-3eae-2ed1-bb478bceb320,2018-07-08,751-8,Neutrophils [#/volume] in Blood by Automated c...,491.4,10*3/uL,numeric
3,f03f50be-20b1-3eae-2ed1-bb478bceb320,2018-07-08,8310-5,Body Temperature,102.2,[degF],numeric
4,11089781-c268-6838-642e-2c2c9edbb694,2020-07-14,751-8,Neutrophils [#/volume] in Blood by Automated c...,258.9,10*3/uL,numeric
...,...,...,...,...,...,...,...
421,d5c0cc48-5f8a-4533-325c-25a9c3284185,2020-10-04,8310-5,Body Temperature,101.6,[degF],numeric
422,eca28495-500f-ab7e-356b-176f31382569,2020-03-02,751-8,Neutrophils [#/volume] in Blood by Automated c...,425.7,10*3/uL,numeric
423,eca28495-500f-ab7e-356b-176f31382569,2020-03-02,8310-5,Body Temperature,100.8,[degF],numeric
424,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1948-05-19,751-8,Neutrophils [#/volume] in Blood by Automated c...,340.5,10*3/uL,numeric


Unnamed: 0,PATIENT,DATE,CODE,DESCRIPTION,VALUE,UNITS,TYPE
0,bb37561b-ba65-7c47-db5b-0641bca883b4,2015-02-07,751-8,Neutrophils [#/volume] in Blood by Automated c...,406.5,10*3/uL,numeric
1,bb37561b-ba65-7c47-db5b-0641bca883b4,2015-02-07,8310-5,Body Temperature,102.3,[degF],numeric
2,678fc07c-1cb1-acc6-3553-d848200626e9,2022-02-07,751-8,Neutrophils [#/volume] in Blood by Automated c...,265.8,10*3/uL,numeric
3,678fc07c-1cb1-acc6-3553-d848200626e9,2022-02-07,8310-5,Body Temperature,101.9,[degF],numeric
4,04efa71e-b8ed-980b-94a1-cd25e94b6015,2022-04-16,751-8,Neutrophils [#/volume] in Blood by Automated c...,496.3,10*3/uL,numeric
...,...,...,...,...,...,...,...
327,7837ca92-1dc3-b3ef-a7f9-4207e439775c,2013-01-08,8310-5,Body Temperature,101.1,[degF],numeric
328,3f3ad6c2-337b-9d48-47b7-b2197a0a0500,1971-07-18,751-8,Neutrophils [#/volume] in Blood by Automated c...,448.6,10*3/uL,numeric
329,3f3ad6c2-337b-9d48-47b7-b2197a0a0500,1971-07-18,8310-5,Body Temperature,100.8,[degF],numeric
330,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-24,751-8,Neutrophils [#/volume] in Blood by Automated c...,303.6,10*3/uL,numeric


Novamente, verificamos a contagem de valores únicos para os identificadores de pacientes e as descrições das observações. Ainda, verificamos o número de vezes que cada paciente apresentou uma observação do tipo "Neutrophils [#/volume] in Blood by Automated count" e "Body Temperature".

In [195]:
print('Cenário 01\n')
print('Contagem de observações para cada paciente:')
print(observations01_i_nf['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de observações:')
print(observations01_i_nf['DESCRIPTION'].value_counts())
print('\nNúmero de vezes que cada paciente apresentou uma observação da contagem de neutrófilos:')
print(observations01_i_nf.query("DESCRIPTION == 'Neutrophils [#/volume] in Blood by Automated count'")['PATIENT'].value_counts())
print('\nNúmero de vezes que cada paciente apresentou uma observação da temperatura corporal:')
print(observations01_i_nf.query("DESCRIPTION == 'Body Temperature'")['PATIENT'].value_counts())

Cenário 01

Contagem de observações para cada paciente:
ae5f15a1-427f-ed8c-f258-f25cfb0fd85a    26
1cee6029-ca41-7786-fc4f-04aea9778336    26
5d3c03b8-74cd-941f-3df6-00c45864f1c2    26
77e53fde-d641-fa26-5792-7a92af4fa260    26
a12fc6b4-938f-9199-2af7-f478c30c1eac    26
                                        ..
7bbcd20f-8d90-2c04-06be-e7918684239f     2
c9176ee2-d94c-6930-d132-0120cf1eff92     2
8a9517bb-bd45-da96-f962-668c905ceecb     2
3fe4eb84-cd88-aa94-fa92-e8b6c0858d94     2
c815ffa4-2917-2c09-1569-e90d5a89eeb2     2
Name: PATIENT, Length: 117, dtype: int64

Contagem dos tipos únicos de observações:
Neutrophils [#/volume] in Blood by Automated count                          117
Body Temperature                                                            117
DALY                                                                         15
QALY                                                                         15
QOLS                                                               

In [196]:
print('Cenário 02\n')
print('Contagem de observações para cada paciente:')
print(observations02_i_nf['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de observações:')
print(observations02_i_nf['DESCRIPTION'].value_counts())
print('\nNúmero de vezes que cada paciente apresentou uma observação da contagem de neutrófilos:')
print(observations02_i_nf.query("DESCRIPTION == 'Neutrophils [#/volume] in Blood by Automated count'")['PATIENT'].value_counts())
print('\nNúmero de vezes que cada paciente apresentou uma observação da temperatura corporal:')
print(observations02_i_nf.query("DESCRIPTION == 'Body Temperature'")['PATIENT'].value_counts())

Cenário 02

Contagem de observações para cada paciente:
243d3ef4-0fb2-edb0-441f-45b5701bf2db    26
10545fda-0b7f-f479-a9eb-a50534aba634    26
38a5661c-416a-08e9-6aed-3bccb151e7e4    26
3ad364dd-9b86-0a69-3a14-4323f141a630     5
0dc670fe-5d98-e434-d1f6-4f253535f817     5
                                        ..
7011f231-ff8e-2605-61da-de2f2d86c68a     2
f4cd1424-0a65-96d3-e184-56b02e74398f     2
c3a6a691-c2b9-a81e-3b0e-46362af0d52e     2
81865e6e-7ce5-f2a9-202f-220bf23a3947     2
2e3402a9-45ff-3558-8450-375f17107ca0     2
Name: PATIENT, Length: 112, dtype: int64

Contagem dos tipos únicos de observações:
Neutrophils [#/volume] in Blood by Automated count                          112
Body Temperature                                                            112
QALY                                                                         15
DALY                                                                         15
QOLS                                                               

Investigamos quantas vezes foram reportadas observações de contagem de neutrófilos e da temperatura corporal considerando todas as datas anteriores ou iguais ao início da NF.

In [197]:
print('Cenário 01\n')
print('Número de observações da contagem de neutrófilos:')
print(observations01_i_b['DESCRIPTION'].value_counts()['Neutrophils [#/volume] in Blood by Automated count'])
print('\nNúmero de observações da temperatura corporal:')
print(observations01_i_b['DESCRIPTION'].value_counts()['Body Temperature'])

print('\nCenário 02\n')
print('Número de observações da contagem de neutrófilos:')
print(observations02_i_b['DESCRIPTION'].value_counts()['Neutrophils [#/volume] in Blood by Automated count'])
print('\nNúmero de observações da temperatura corporal:')
print(observations02_i_b['DESCRIPTION'].value_counts()['Body Temperature'])

Cenário 01

Número de observações da contagem de neutrófilos:
117

Número de observações da temperatura corporal:
117

Cenário 02

Número de observações da contagem de neutrófilos:
112

Número de observações da temperatura corporal:
112


Os números de observações é igual considerando datas inferiores ou iguais ao início da NF e considerando apenas a data de início da NF. Ou seja, observações desse tipo não foram realizadas antes do diagnóstico da NF.

No entanto, acabamos notando que, sem a imposição do limite relacionado à data de inicio da NF, todos os pacientes de interesse apresentaram as observações de contagem de neutrófilos e temperatura corporal.

In [198]:
print('Cenário 01:\n')
print('Número de vezes que cada paciente apresentou uma observação da contagem de neutrófilos, desconsiderando a data limite:')
print(observations01_i.query("DESCRIPTION == 'Neutrophils [#/volume] in Blood by Automated count'")['PATIENT'].value_counts())
print('\nNúmero de vezes que cada paciente apresentou uma observação da temperatura corporal, desconsiderando a data limite:')
print(observations01_i.query("DESCRIPTION == 'Body Temperature'")['PATIENT'].value_counts())

print('\nCenário 02:\n')
print('Número de vezes que cada paciente apresentou uma observação da contagem de neutrófilos, desconsiderando a data limite:')
print(observations02_i.query("DESCRIPTION == 'Neutrophils [#/volume] in Blood by Automated count'")['PATIENT'].value_counts())
print('\nNúmero de vezes que cada paciente apresentou uma observação da temperatura corporal, desconsiderando a data limite:')
print(observations02_i.query("DESCRIPTION == 'Body Temperature'")['PATIENT'].value_counts())

Cenário 01:

Número de vezes que cada paciente apresentou uma observação da contagem de neutrófilos, desconsiderando a data limite:
4288f90b-4774-c329-3176-c1482e824c04    1
ee42b995-8fc5-d8bf-5c09-205408bb76fd    1
e7e52291-59e3-52f1-310c-a232fd7be114    1
0ecf6b43-7c73-0458-ab56-19de89acf41e    1
49bd843b-b75e-ac60-a452-605e324c9df2    1
                                       ..
e90e1bfc-18af-2b1b-e4a2-c4fc60d8e18a    1
2dfb6d26-2f71-ecc5-6b9d-ea20dc1ae490    1
a259d333-6bb2-30f5-ecb9-d5ff168905b0    1
a12fc6b4-938f-9199-2af7-f478c30c1eac    1
c815ffa4-2917-2c09-1569-e90d5a89eeb2    1
Name: PATIENT, Length: 139, dtype: int64

Número de vezes que cada paciente apresentou uma observação da temperatura corporal, desconsiderando a data limite:
4288f90b-4774-c329-3176-c1482e824c04    1
ee42b995-8fc5-d8bf-5c09-205408bb76fd    1
e7e52291-59e3-52f1-310c-a232fd7be114    1
0ecf6b43-7c73-0458-ab56-19de89acf41e    1
49bd843b-b75e-ac60-a452-605e324c9df2    1
                                      

Assim, decidimos descartar o limite de data relacionado ao início da NF, de modo que todos os pacientes de interesse possuam valores para as observações de contagem de neutrófilos e de temperatura corporal.

Verificamos, então, algumas estatísticas descritivas básicas para os valores observados da contagem de neutrófilos e da temperatura corporal.

In [199]:
#Valores observados para a contagem de neutrófilos e para a temperatura corporal (cenário 01)
neut01 = observations01_i.query("DESCRIPTION == 'Neutrophils [#/volume] in Blood by Automated count'")[['PATIENT', 'VALUE']]
bt01 = observations01_i.query("DESCRIPTION == 'Body Temperature'")[['PATIENT', 'VALUE']]

#Valores observados para a contagem de neutrófilos e para a temperatura corporal (cenário 02)
neut02 = observations02_i.query("DESCRIPTION == 'Neutrophils [#/volume] in Blood by Automated count'")[['PATIENT', 'VALUE']]
bt02 = observations02_i.query("DESCRIPTION == 'Body Temperature'")[['PATIENT', 'VALUE']]

print('Cenário 01\n')
print('Estatísticas descritivas básicas para os valores observados da contagem de neutrófilos:')
print(pd.to_numeric(neut01['VALUE']).describe())
print('\nEstatísticas descritivas básicas para os valores observados da temperatura corporal:')
print(pd.to_numeric(bt01['VALUE']).describe())

print('\nCenário 02\n')
print('Estatísticas descritivas básicas para os valores observados da contagem de neutrófilos:')
print(pd.to_numeric(neut02['VALUE']).describe())
print('\nEstatísticas descritivas básicas para os valores observados da temperatura corporal:')
print(pd.to_numeric(bt02['VALUE']).describe())

Cenário 01

Estatísticas descritivas básicas para os valores observados da contagem de neutrófilos:
count    139.000000
mean     380.464748
std       71.293141
min      252.800000
25%      323.500000
50%      378.800000
75%      439.600000
max      497.700000
Name: VALUE, dtype: float64

Estatísticas descritivas básicas para os valores observados da temperatura corporal:
count    139.000000
mean     101.720863
std        0.790934
min      100.400000
25%      101.000000
50%      101.800000
75%      102.350000
max      103.000000
Name: VALUE, dtype: float64

Cenário 02

Estatísticas descritivas básicas para os valores observados da contagem de neutrófilos:
count    117.000000
mean     378.173504
std       70.403090
min      250.600000
25%      324.400000
50%      374.100000
75%      441.400000
max      499.900000
Name: VALUE, dtype: float64

Estatísticas descritivas básicas para os valores observados da temperatura corporal:
count    117.000000
mean     101.723932
std        0.785060
min

Por fim, acrescentamos os valores das observações nas tabelas de *features*.

In [200]:
#Acrescentando os valores das observações da contagem de neutrófilos e da temperatura corporal nas tabelas pro orange:
scenario01 = scenario01.merge(neut01, on='PATIENT', how='left')
scenario01.rename(columns={"VALUE":"NEUT_CONT"}, inplace=True)

scenario01 = scenario01.merge(bt01, on='PATIENT', how='left')
scenario01.rename(columns={"VALUE":"BODY_TEMP_F"}, inplace=True)

display(scenario01)

scenario02 = scenario02.merge(neut02, on='PATIENT', how='left')
scenario02.rename(columns={"VALUE":"NEUT_CONT"}, inplace=True)

scenario02 = scenario02.merge(bt02, on='PATIENT', how='left')
scenario02.rename(columns={"VALUE":"BODY_TEMP_F"}, inplace=True)

display(scenario02)

Unnamed: 0,PATIENT,BIRTHDATE,RACE,ETHNICITY,GENDER,DEATH_FN,AGE_FN_YEARS,BACTEREMIA,WELLNESS_CONT,AMBULATORY_SYMPTOM_CONT,MED_AMLD,NEUT_CONT,BODY_TEMP_F
0,4288f90b-4774-c329-3176-c1482e824c04,2010-07-13,white,nonhispanic,M,0,2,0,1.0,0.0,0,255.1,101.2
1,f03f50be-20b1-3eae-2ed1-bb478bceb320,2002-07-12,white,nonhispanic,M,0,16,1,6.0,1.0,1,491.4,102.2
2,11089781-c268-6838-642e-2c2c9edbb694,2011-07-17,white,nonhispanic,F,0,9,0,12.0,2.0,0,258.9,102.0
3,d7b9725d-889f-d178-cf41-dbf8b373eda9,2010-10-18,black,nonhispanic,F,0,6,0,7.0,1.0,0,360.7,102.9
4,ed2bb6aa-1f3c-b72b-46a6-54f05cac7da7,2008-04-09,asian,nonhispanic,M,0,12,0,7.0,2.0,0,364.6,100.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,7d87bdff-2df1-8162-80e4-2e406304515d,2011-06-17,white,nonhispanic,M,0,2,0,4.0,0.0,0,479.1,101.0
135,d5c0cc48-5f8a-4533-325c-25a9c3284185,2017-10-05,other,nonhispanic,F,0,3,1,11.0,1.0,1,476.6,101.6
136,a4cabcbc-8282-5599-6b69-01e28e69c045,2004-09-24,black,nonhispanic,M,0,16,0,8.0,0.0,0,364.4,101.5
137,eca28495-500f-ab7e-356b-176f31382569,2017-03-03,white,nonhispanic,F,0,3,0,11.0,0.0,0,425.7,100.8


Unnamed: 0,PATIENT,BIRTHDATE,RACE,ETHNICITY,GENDER,DEATH_FN,AGE_FN_YEARS,BACTEREMIA,WELLNESS_CONT,AMBULATORY_SYMPTOM_CONT,MED_AMLD,NEUT_CONT,BODY_TEMP_F
0,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-02-08,white,nonhispanic,M,0,3,0,9.0,1.0,0,406.5,102.3
1,678fc07c-1cb1-acc6-3553-d848200626e9,2006-02-11,white,nonhispanic,F,0,16,0,9.0,2.0,0,265.8,101.9
2,04efa71e-b8ed-980b-94a1-cd25e94b6015,2018-04-17,other,nonhispanic,F,0,4,0,13.0,2.0,0,496.3,101.7
3,1e466cbc-5018-4c1a-b132-cf8a4a4d87cf,1997-02-18,white,nonhispanic,F,0,21,1,4.0,10.0,0,321.7,101.8
4,8e9d1dd0-085f-a629-3ea6-9033f330b383,1996-03-07,white,nonhispanic,F,0,20,1,3.0,1.0,0,460.9,103.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,84c9e441-fa88-33c8-5d87-91b863998d26,1944-08-04,white,nonhispanic,F,1,2,1,9.0,1.0,0,250.6,100.9
113,7837ca92-1dc3-b3ef-a7f9-4207e439775c,1996-01-13,white,nonhispanic,M,0,17,0,15.0,2.0,0,435.9,101.1
114,3f3ad6c2-337b-9d48-47b7-b2197a0a0500,1969-07-18,white,nonhispanic,F,1,2,1,9.0,0.0,1,448.6,100.8
115,32d8410a-cc4b-4d8b-601d-3ad2b8ef912b,2006-08-23,white,nonhispanic,M,0,9,0,8.0,4.0,0,391.1,102.0


## Immunizations 

Encontrando os dados de imunização cuja data é anterior ou igual à data de início da NF.

In [201]:
#Mudando as datas das imunizações do formato data/hora para apenas data, de modo a realizar a comparação seguinte
immunizations01_i['DATE'] = immunizations01_i['DATE'].dt.date
immunizations01_i['DATE'] = pd.to_datetime(immunizations01_i['DATE'])

immunizations02_i['DATE'] = immunizations02_i['DATE'].dt.date
immunizations02_i['DATE'] = pd.to_datetime(immunizations02_i['DATE'])

In [202]:
#Obtendo as imunizações realizadas antes ou no dia que a NF começou
immunizations01_i_b = pd.DataFrame(columns=['PATIENT', 'DATE', 'DESCRIPTION'])
for p in patients01:
  date = nf01.loc[nf01['PATIENT'] == p, 'START'].values[0]
  immunizations01_i_b = pd.concat([immunizations01_i_b, immunizations01_i.query('PATIENT == @p & DATE <= @date')[['PATIENT', 'DATE', 'DESCRIPTION']]], ignore_index=True)

display(immunizations01_i_b)

immunizations02_i_b = pd.DataFrame(columns=['PATIENT', 'DATE', 'DESCRIPTION'])
for p in patients02:
  date = nf02.loc[nf02['PATIENT'] == p, 'START'].values[0]
  immunizations02_i_b = pd.concat([immunizations02_i_b, immunizations02_i.query('PATIENT == @p & DATE <= @date')[['PATIENT', 'DATE', 'DESCRIPTION']]], ignore_index=True)

display(immunizations02_i_b)

Unnamed: 0,PATIENT,DATE,DESCRIPTION
0,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,Influenza seasonal injectable preservative ...
1,f03f50be-20b1-3eae-2ed1-bb478bceb320,2012-07-20,Influenza seasonal injectable preservative ...
2,f03f50be-20b1-3eae-2ed1-bb478bceb320,2013-07-26,Tdap
3,f03f50be-20b1-3eae-2ed1-bb478bceb320,2013-07-26,Influenza seasonal injectable preservative ...
4,f03f50be-20b1-3eae-2ed1-bb478bceb320,2013-07-26,HPV quadrivalent
...,...,...,...
1565,eca28495-500f-ab7e-356b-176f31382569,2019-08-09,Hep A ped/adol 2 dose
1566,eca28495-500f-ab7e-356b-176f31382569,2020-02-07,Influenza seasonal injectable preservative ...
1567,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1945-06-12,Influenza seasonal injectable preservative ...
1568,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1946-06-18,Influenza seasonal injectable preservative ...


Unnamed: 0,PATIENT,DATE,DESCRIPTION
0,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,Hib (PRP-OMP)
1,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,rotavirus monovalent
2,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,IPV
3,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,DTaP
4,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,Pneumococcal conjugate PCV 13
...,...,...,...
1481,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-05,varicella
1482,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-05,IPV
1483,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-05,Influenza seasonal injectable preservative ...
1484,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-05,DTaP


Obtendo a contagem de valores únicos para os identificadores de pacientes e as descrições das imunizações.

In [203]:
print('Cenário 01\n')
print('Contagem de imunizações para cada paciente:')
print(immunizations01_i_b['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de imunizações:')
print(immunizations01_i_b['DESCRIPTION'].value_counts())
print('\nCenário 02\n')
print('Contagem de imunizações para cada paciente:')
print(immunizations02_i_b['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de imunizações:')
print(immunizations02_i_b['DESCRIPTION'].value_counts())

Cenário 01

Contagem de imunizações para cada paciente:
fbb23e67-a3a6-1662-ab90-e9f717c61a97    34
6884a50b-2060-67ff-0863-51569e687cc2    32
0f652e34-7595-a919-5856-50e0d6258699    32
d1fe5a74-fd5a-4651-4b55-2985bc8288ea    32
f0f510b2-9888-120a-a693-bcaca0f937cd    32
                                        ..
cfe7cd0f-57de-4f43-d870-221bba80520b     1
7bbcd20f-8d90-2c04-06be-e7918684239f     1
ae5f15a1-427f-ed8c-f258-f25cfb0fd85a     1
99bfcaff-0b03-90da-dad5-b23fcc28fb61     1
4288f90b-4774-c329-3176-c1482e824c04     1
Name: PATIENT, Length: 124, dtype: int64

Contagem dos tipos únicos de imunizações:
Influenza  seasonal  injectable  preservative free                                               449
DTaP                                                                                             171
IPV                                                                                              166
Pneumococcal conjugate PCV 13                                                       

## Procedures

Encontrando os dados de procedimentos cuja data inicial é anterior ou igual à data de início da NF.

In [204]:
#Mudando as datas iniciais dos procedimentos do formato data/hora para apenas data, de modo a realizar a comparação seguinte
procedures01_i['START'] = procedures01_i['START'].dt.date
procedures01_i['START'] = pd.to_datetime(procedures01_i['START'])

procedures02_i['START'] = procedures02_i['START'].dt.date
procedures02_i['START'] = pd.to_datetime(procedures02_i['START'])

In [205]:
#Obtendo os procedimentos realizados antes ou no dia que a NF começou
procedures01_i_b = pd.DataFrame(columns=['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION'])
for p in patients01:
  date = nf01.loc[nf01['PATIENT'] == p, 'START'].values[0]
  procedures01_i_b = pd.concat([procedures01_i_b, procedures01_i.query('PATIENT == @p & START <= @date')[['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION']]], ignore_index=True)

display(procedures01_i_b)

procedures02_i_b = pd.DataFrame(columns=['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION'])
for p in patients02:
  date = nf02.loc[nf02['PATIENT'] == p, 'START'].values[0]
  procedures02_i_b = pd.concat([procedures02_i_b, procedures02_i.query('PATIENT == @p & START <= @date')[['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION']]], ignore_index=True)

display(procedures02_i_b)

Unnamed: 0,PATIENT,START,CODE,DESCRIPTION,REASONCODE,REASONDESCRIPTION
0,4288f90b-4774-c329-3176-c1482e824c04,2012-06-19,430193006,Medication Reconciliation (procedure),,
1,4288f90b-4774-c329-3176-c1482e824c04,2012-07-12,367336001,Chemotherapy (procedure),91861009.0,Acute myeloid leukemia disease (disorder)
2,4288f90b-4774-c329-3176-c1482e824c04,2012-07-12,449214001,Transfer to stepdown,91861009.0,Acute myeloid leukemia disease (disorder)
3,f03f50be-20b1-3eae-2ed1-bb478bceb320,2012-07-20,430193006,Medication Reconciliation (procedure),,
4,f03f50be-20b1-3eae-2ed1-bb478bceb320,2013-01-02,63332003,History AND physical examination (procedure),,
...,...,...,...,...,...,...
2549,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1947-06-24,428211000124100,Assessment of substance use (procedure),,
2550,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1947-06-24,868187001,Assessment using Car Relax Alone Forget Fr...,,
2551,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1947-06-24,386516004,Anticipatory guidance (procedure),,
2552,c815ffa4-2917-2c09-1569-e90d5a89eeb2,1948-05-19,367336001,Chemotherapy (procedure),91861009.0,Acute myeloid leukemia disease (disorder)


Unnamed: 0,PATIENT,START,CODE,DESCRIPTION,REASONCODE,REASONDESCRIPTION
0,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-05-16,430193006,Medication Reconciliation (procedure),,
1,bb37561b-ba65-7c47-db5b-0641bca883b4,2012-10-17,430193006,Medication Reconciliation (procedure),,
2,bb37561b-ba65-7c47-db5b-0641bca883b4,2013-04-11,313191000,Injection of adrenaline,,
3,bb37561b-ba65-7c47-db5b-0641bca883b4,2013-04-29,395142003,Allergy screening test,,
4,bb37561b-ba65-7c47-db5b-0641bca883b4,2014-07-16,430193006,Medication Reconciliation (procedure),,
...,...,...,...,...,...,...
1971,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-13,1225002,Upper arm X-ray,,
1972,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-13,274474001,Bone immobilization,65966004.0,Fracture of forearm
1973,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-24,367336001,Chemotherapy (procedure),91861009.0,Acute myeloid leukemia disease (disorder)
1974,2e3402a9-45ff-3558-8450-375f17107ca0,2021-06-24,305351004,Admit to intensive care unit (ICU),409089005.0,Febrile neutropenia (disorder)


Obtendo a contagem de valores únicos para os identificadores de pacientes e as descrições dos procedimentos.

In [206]:
print('Cenário 01\n')
print('Contagem de procedimentos para cada paciente:')
print(procedures01_i_b['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de procedimentos:')
print(procedures01_i_b['DESCRIPTION'].value_counts())
print('\nCenário 02\n')
print('Contagem de procedimentos para cada paciente:')
print(procedures02_i_b['PATIENT'].value_counts())
print('\nContagem dos tipos únicos de procedimentos:')
print(procedures02_i_b['DESCRIPTION'].value_counts())

Cenário 01

Contagem de procedimentos para cada paciente:
fcacc452-c0ff-6fbe-c8b7-4b77e3807232    403
98342e81-23da-dae8-5506-8da094c7d29e    168
f03f50be-20b1-3eae-2ed1-bb478bceb320    138
28911653-c9ef-e5a8-61a5-668dbb48f58f    109
a259d333-6bb2-30f5-ecb9-d5ff168905b0    106
                                       ... 
adf63106-46a4-e19c-349a-f0a3eec09e33      2
7796fd57-7cb0-c895-5b1a-24dd98001bf0      2
331b91fb-bca6-ab76-2671-1bf22def011e      2
1cee6029-ca41-7786-fc4f-04aea9778336      2
d1fe5a74-fd5a-4651-4b55-2985bc8288ea      1
Name: PATIENT, Length: 137, dtype: int64

Contagem dos tipos únicos de procedimentos:
Medication Reconciliation (procedure)                        419
Nursing care/supplementary surveillance (regime/therapy)     165
Physical therapy procedure (regime/therapy)                  156
Occupational therapy (regime/therapy)                        154
Chemotherapy (procedure)                                     120
                                               

Considerando apenas os procedimentos realizados em razão da "Acute myeloid leukemia disease (disorder)".

In [207]:
print('Contagem de procedimentos realizados por paciente em razão do câncer (cenário 01):')
procedures01_i_b.loc[procedures01_i_b["REASONCODE"]== 91861009, 'PATIENT'].value_counts()

Contagem de procedimentos realizados por paciente em razão do câncer (cenário 01):


4288f90b-4774-c329-3176-c1482e824c04    2
f03f50be-20b1-3eae-2ed1-bb478bceb320    2
84483a05-0e7c-cc01-7ef7-83e93da36d92    2
4b8877ac-d142-dbf4-f1e0-06c638ab66c9    2
adf63106-46a4-e19c-349a-f0a3eec09e33    2
                                       ..
2dfb6d26-2f71-ecc5-6b9d-ea20dc1ae490    2
c815ffa4-2917-2c09-1569-e90d5a89eeb2    2
738a5bc6-78c7-2b05-d185-9741920b0497    1
0b7f4569-6127-edae-6b43-3ba8469a21dc    1
47c2ab95-1569-1afa-4550-ac83d53f0384    1
Name: PATIENT, Length: 120, dtype: int64

In [208]:
print('Contagem de procedimentos realizados por paciente em razão do câncer (cenário 02):')
procedures02_i_b.loc[procedures02_i_b["REASONCODE"]== 91861009, 'PATIENT'].value_counts()

Contagem de procedimentos realizados por paciente em razão do câncer (cenário 02):


bb37561b-ba65-7c47-db5b-0641bca883b4    2
678fc07c-1cb1-acc6-3553-d848200626e9    2
6b17b083-39c0-f107-e905-ee89ad02d0ed    2
1ac16be4-9c27-bc83-e3cb-1ea35040406f    2
5d102f9f-543b-d0bc-e509-6887faddb0da    2
                                       ..
f4cd1424-0a65-96d3-e184-56b02e74398f    2
c3a6a691-c2b9-a81e-3b0e-46362af0d52e    2
81865e6e-7ce5-f2a9-202f-220bf23a3947    2
f4b28b81-0c49-d919-8a75-a7a8912d5556    2
2e3402a9-45ff-3558-8450-375f17107ca0    2
Name: PATIENT, Length: 112, dtype: int64

Notamos que, considerando a data limite relacionada ao início da NF, nem todos os pacientes apresentaram procedimentos relacionados ao diagnóstico de câncer. No entanto, desconsiderando essa data limite, foi possível obter dados para todos os pacientes.

Obtendo os procedimentos realizados em razão da "Acute myeloid leukemia disease (disorder)", desconsiderando a data limite.

In [209]:
#Cenário 01
amld01 = procedures01_i.query('REASONCODE == 91861009')
display(amld01)
#Cenário 02
amld02 = procedures02_i.query('REASONCODE == 91861009')
display(amld02)

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,BASE_COST,REASONCODE,REASONDESCRIPTION
670,2012-07-12,2012-07-12 14:37:25+00:00,4288f90b-4774-c329-3176-c1482e824c04,7478a188-6e9b-6e3b-6f6c-b4f761647917,367336001,Chemotherapy (procedure),431.4,91861009.0,Acute myeloid leukemia disease (disorder)
671,2012-07-12,2012-07-12 15:07:25+00:00,4288f90b-4774-c329-3176-c1482e824c04,7478a188-6e9b-6e3b-6f6c-b4f761647917,449214001,Transfer to stepdown,431.4,91861009.0,Acute myeloid leukemia disease (disorder)
1296,2018-07-08,2018-07-08 12:53:28+00:00,f03f50be-20b1-3eae-2ed1-bb478bceb320,3952605e-1f86-c0ca-2cd4-526690fa035e,367336001,Chemotherapy (procedure),431.4,91861009.0,Acute myeloid leukemia disease (disorder)
1298,2018-07-08,2018-07-08 13:53:28+00:00,f03f50be-20b1-3eae-2ed1-bb478bceb320,3952605e-1f86-c0ca-2cd4-526690fa035e,449214001,Transfer to stepdown,431.4,91861009.0,Acute myeloid leukemia disease (disorder)
1721,2020-07-14,2020-07-14 06:54:55+00:00,11089781-c268-6838-642e-2c2c9edbb694,23f44790-ea99-7c28-af8d-9cdd2ac28a79,367336001,Chemotherapy (procedure),431.4,91861009.0,Acute myeloid leukemia disease (disorder)
...,...,...,...,...,...,...,...,...,...
100216,2020-09-21,2020-09-21 01:28:41+00:00,a4cabcbc-8282-5599-6b69-01e28e69c045,10cd460d-c8fe-415e-53c8-d4474b6a3b2e,449214001,Transfer to stepdown,431.4,91861009.0,Acute myeloid leukemia disease (disorder)
102646,2020-03-02,2020-03-02 13:29:56+00:00,eca28495-500f-ab7e-356b-176f31382569,5989068e-8c3b-61a3-acb8-49773e26e4bc,367336001,Chemotherapy (procedure),431.4,91861009.0,Acute myeloid leukemia disease (disorder)
102647,2020-03-02,2020-03-02 13:59:56+00:00,eca28495-500f-ab7e-356b-176f31382569,5989068e-8c3b-61a3-acb8-49773e26e4bc,449214001,Transfer to stepdown,431.4,91861009.0,Acute myeloid leukemia disease (disorder)
107575,1948-05-19,1948-05-19 12:33:18+00:00,c815ffa4-2917-2c09-1569-e90d5a89eeb2,6f119a9d-c2cb-97b3-4233-0fcee24f6bc5,367336001,Chemotherapy (procedure),431.4,91861009.0,Acute myeloid leukemia disease (disorder)


Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,BASE_COST,REASONCODE,REASONDESCRIPTION
16,2015-02-07,2015-02-07 23:35:43+00:00,bb37561b-ba65-7c47-db5b-0641bca883b4,2628ed4a-c240-0c28-0462-84c39c3f1dad,367336001,Chemotherapy (procedure),587.55,91861009.0,Acute myeloid leukemia disease (disorder)
17,2015-02-07,2015-02-08 00:05:43+00:00,bb37561b-ba65-7c47-db5b-0641bca883b4,2628ed4a-c240-0c28-0462-84c39c3f1dad,449214001,Transfer to stepdown,587.55,91861009.0,Acute myeloid leukemia disease (disorder)
52,2022-02-07,2022-02-07 20:56:28+00:00,678fc07c-1cb1-acc6-3553-d848200626e9,398930ec-d19a-7ac2-5524-673994779f88,367336001,Chemotherapy (procedure),587.55,91861009.0,Acute myeloid leukemia disease (disorder)
53,2022-02-07,2022-02-07 21:26:28+00:00,678fc07c-1cb1-acc6-3553-d848200626e9,398930ec-d19a-7ac2-5524-673994779f88,449214001,Transfer to stepdown,587.55,91861009.0,Acute myeloid leukemia disease (disorder)
1275,2022-04-16,2022-04-16 07:23:25+00:00,04efa71e-b8ed-980b-94a1-cd25e94b6015,324892c0-ec71-bce5-2128-77987ec564be,367336001,Chemotherapy (procedure),587.55,91861009.0,Acute myeloid leukemia disease (disorder)
...,...,...,...,...,...,...,...,...,...
95072,1971-07-18,1971-07-18 20:15:52+00:00,3f3ad6c2-337b-9d48-47b7-b2197a0a0500,ae91d7a2-204f-62c3-eecc-a13353a190ba,16983000,Death in hospital (event),587.55,91861009.0,Acute myeloid leukemia disease (disorder)
95209,2015-08-22,2015-08-22 01:43:34+00:00,32d8410a-cc4b-4d8b-601d-3ad2b8ef912b,dacf6281-6ef4-52f6-de6a-dd7c8271a4a0,367336001,Chemotherapy (procedure),587.55,91861009.0,Acute myeloid leukemia disease (disorder)
95210,2015-08-22,2015-08-22 02:13:34+00:00,32d8410a-cc4b-4d8b-601d-3ad2b8ef912b,dacf6281-6ef4-52f6-de6a-dd7c8271a4a0,449214001,Transfer to stepdown,587.55,91861009.0,Acute myeloid leukemia disease (disorder)
95453,2021-06-24,2021-06-24 22:39:51+00:00,2e3402a9-45ff-3558-8450-375f17107ca0,cbaa7912-afba-603a-b66a-5e69d44f2216,367336001,Chemotherapy (procedure),587.55,91861009.0,Acute myeloid leukemia disease (disorder)


In [210]:
print('Contagem de procedimentos realizados por paciente em razão do câncer, desconsiderando a data limite (cenário 01):')
amld01['PATIENT'].value_counts()

Contagem de procedimentos realizados por paciente em razão do câncer, desconsiderando a data limite (cenário 01):


4288f90b-4774-c329-3176-c1482e824c04    2
ee42b995-8fc5-d8bf-5c09-205408bb76fd    2
e7e52291-59e3-52f1-310c-a232fd7be114    2
0ecf6b43-7c73-0458-ab56-19de89acf41e    2
49bd843b-b75e-ac60-a452-605e324c9df2    2
                                       ..
e90e1bfc-18af-2b1b-e4a2-c4fc60d8e18a    2
2dfb6d26-2f71-ecc5-6b9d-ea20dc1ae490    2
a259d333-6bb2-30f5-ecb9-d5ff168905b0    2
a12fc6b4-938f-9199-2af7-f478c30c1eac    2
c815ffa4-2917-2c09-1569-e90d5a89eeb2    2
Name: PATIENT, Length: 139, dtype: int64

In [211]:
print('Contagem de procedimentos realizados por paciente em razão do câncer, desconsiderando a data limite (cenário 02):')
amld02['PATIENT'].value_counts()

Contagem de procedimentos realizados por paciente em razão do câncer, desconsiderando a data limite (cenário 02):


bb37561b-ba65-7c47-db5b-0641bca883b4    2
fe9e8ecf-79e1-545d-a28b-26274781bd97    2
3edd7198-2494-cc95-d844-ab3a3adb562f    2
6b17b083-39c0-f107-e905-ee89ad02d0ed    2
1ac16be4-9c27-bc83-e3cb-1ea35040406f    2
                                       ..
c3a6a691-c2b9-a81e-3b0e-46362af0d52e    2
81865e6e-7ce5-f2a9-202f-220bf23a3947    2
f4b28b81-0c49-d919-8a75-a7a8912d5556    2
33469efc-5b9a-e5fe-a3be-b99e946395d3    2
2e3402a9-45ff-3558-8450-375f17107ca0    2
Name: PATIENT, Length: 117, dtype: int64

Ainda, cada paciente apresentou exatamente dois procedimentos relacionados ao câncer.

Verificamos, então, os tipos de procedimentos.

In [212]:
print('Tipos diferentes de procedimentos (cenário 01):')
print(amld01['DESCRIPTION'].value_counts())
print('\nTipos diferentes de procedimentos (cenário 02):')
amld02['DESCRIPTION'].value_counts()

Tipos diferentes de procedimentos (cenário 01):
Chemotherapy (procedure)     139
Transfer to stepdown         113
Death in hospital (event)     26
Name: DESCRIPTION, dtype: int64

Tipos diferentes de procedimentos (cenário 02):


Chemotherapy (procedure)     117
Transfer to stepdown         104
Death in hospital (event)     13
Name: DESCRIPTION, dtype: int64

In [213]:
print('Contagem de vezes que cada paciente fez quimioterapia (cenário 01):')
print(amld01.loc[amld01['DESCRIPTION'] == 'Chemotherapy (procedure)', 'PATIENT'].value_counts())

print('\nContagem de vezes que cada paciente fez quimioterapia (cenário 02):')
print(amld02.loc[amld02['DESCRIPTION'] == 'Chemotherapy (procedure)', 'PATIENT'].value_counts())

Contagem de vezes que cada paciente fez quimioterapia (cenário 01):
4288f90b-4774-c329-3176-c1482e824c04    1
ee42b995-8fc5-d8bf-5c09-205408bb76fd    1
e7e52291-59e3-52f1-310c-a232fd7be114    1
0ecf6b43-7c73-0458-ab56-19de89acf41e    1
49bd843b-b75e-ac60-a452-605e324c9df2    1
                                       ..
e90e1bfc-18af-2b1b-e4a2-c4fc60d8e18a    1
2dfb6d26-2f71-ecc5-6b9d-ea20dc1ae490    1
a259d333-6bb2-30f5-ecb9-d5ff168905b0    1
a12fc6b4-938f-9199-2af7-f478c30c1eac    1
c815ffa4-2917-2c09-1569-e90d5a89eeb2    1
Name: PATIENT, Length: 139, dtype: int64

Contagem de vezes que cada paciente fez quimioterapia (cenário 02):
bb37561b-ba65-7c47-db5b-0641bca883b4    1
fe9e8ecf-79e1-545d-a28b-26274781bd97    1
3edd7198-2494-cc95-d844-ab3a3adb562f    1
6b17b083-39c0-f107-e905-ee89ad02d0ed    1
1ac16be4-9c27-bc83-e3cb-1ea35040406f    1
                                       ..
c3a6a691-c2b9-a81e-3b0e-46362af0d52e    1
81865e6e-7ce5-f2a9-202f-220bf23a3947    1
f4b28b81-0c49-d919-8a75-

É interessante notar que, como o esperado, todos os pacientes que tiveram NF fizeram quimioterapia. Ainda, o número de pacientes que morreram no hospital foi igual ao número de pacientes que morreram com NF que obtivemos nas análises anteriores.

## Careplans

Verificando as diferentes razões para os "careplans" e sua contagem.

In [214]:
print('Cenário 01:')
print(careplans01_i['REASONDESCRIPTION'].value_counts())

print('\nCenário 02:')
print(careplans02_i['REASONDESCRIPTION'].value_counts())

Cenário 01:
Concussion with no loss of consciousness        12
Sprain of ankle                                 11
Hypertension                                    11
Childhood asthma                                10
Sprain of wrist                                  8
Fracture of ankle                                6
Laceration of hand                               5
Fracture of forearm                              5
Atopic dermatitis                                5
Second degree burn                               4
Laceration of thigh                              4
Laceration of forearm                            4
Prediabetes                                      3
Concussion with loss of consciousness            3
Fracture of clavicle                             3
Fracture subluxation of wrist                    3
Laceration of foot                               3
Concussion injury of brain                       2
First degree burn                                2
Facial laceration  

Verificando as diferentes descrições para os "careplans" e sua contagem.

In [215]:
print('Cenário 01:')
print(careplans01_i['DESCRIPTION'].value_counts())

print('\nCenário 02:')
print(careplans02_i['DESCRIPTION'].value_counts())

Cenário 01:
Respiratory therapy                                               69
Routine antenatal care                                            26
Physiotherapy care plan (record artifact)                         25
Self-care interventions (procedure)                               25
Fracture care                                                     20
Infectious disease care plan (record artifact)                    18
Wound care                                                        18
Head injury rehabilitation                                        17
Therapy (regime/therapy)                                          12
Musculoskeletal care                                              12
Lifestyle education regarding hypertension                        11
Asthma self management                                            10
Overactivity/inattention behavior management                       7
Weight management program                                          6
Burn care             

Verificando o número de pacientes que passaram por terapia respiratória (careplan que mais apareceu).

In [216]:
print('Cenário 01:')
rt01 = careplans01_i.loc[careplans01_i['DESCRIPTION'] == 'Respiratory therapy']
print(rt01['PATIENT'].value_counts())
print('Número de pacientes: ', len(rt01['PATIENT'].value_counts()))

print('\nCenário 02:')
rt02 = careplans02_i.loc[careplans02_i['DESCRIPTION'] == 'Respiratory therapy']
print(rt02['PATIENT'].value_counts())
print('Número de pacientes: ', len(rt02['PATIENT'].value_counts()))

Cenário 01:
8288abe0-04e8-ee34-c028-535da273c3dd    3
11d7ecb8-1e4b-ff44-ff40-80755767512c    3
6ada49d6-cce2-faea-781d-5f4c282c49b4    3
1e92bf63-416b-c43a-fab1-7b25599ac247    2
61c56ab4-d429-19d3-6eb8-be4021a8f96f    2
f05e62cc-d39b-c0a4-4cda-7af990fafd7f    2
a259d333-6bb2-30f5-ecb9-d5ff168905b0    2
160a53cf-63b1-790d-5e95-307e88c1dc9c    2
a08eb05d-46f4-c6cf-feeb-b0ac56f61458    2
11c85df9-4085-afce-75ba-3e62f0c77609    2
eabfa666-29b5-64f3-105a-e8ea7653ae70    1
7c2d7821-5781-20ff-1b7d-9c0b48d02151    1
3fa732a0-cdca-c427-6c38-2afa3f4b2369    1
99bfcaff-0b03-90da-dad5-b23fcc28fb61    1
0ecf6b43-7c73-0458-ab56-19de89acf41e    1
49bd843b-b75e-ac60-a452-605e324c9df2    1
63f9799b-e770-b92d-d74e-d57609cd775e    1
4288f90b-4774-c329-3176-c1482e824c04    1
f0f510b2-9888-120a-a693-bcaca0f937cd    1
05313452-3962-82d9-60a2-cd79e464cc80    1
0f652e34-7595-a919-5856-50e0d6258699    1
dd5328d0-4c53-d310-8411-2d3a7fa5ed58    1
a0a39700-f3b2-a365-b6e2-4f41cd34a706    1
70a574b3-eddf-abcb-9a6

Verificando o número de pacientes que passaram por terapia respiratória até o dia que a NF começou.

In [217]:
#Obtendo os careplans do tipo terapia respiratoria realizados antes ou no dia que a NF começou
rt01_b = pd.DataFrame(columns=['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION'])
for p in patients01:
  date = nf01.loc[nf01['PATIENT'] == p, 'START'].values[0]
  rt01_b = pd.concat([rt01_b, rt01.query('PATIENT == @p & START <= @date')[['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION']]], ignore_index=True)

display(rt01_b)

rt02_b = pd.DataFrame(columns=['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION'])
for p in patients02:
  date = nf02.loc[nf02['PATIENT'] == p, 'START'].values[0]
  rt02_b = pd.concat([rt02_b, rt02.query('PATIENT == @p & START <= @date')[['PATIENT', 'START', 'CODE', 'DESCRIPTION', 'REASONCODE', 'REASONDESCRIPTION']]], ignore_index=True)

display(rt02_b)

Unnamed: 0,PATIENT,START,CODE,DESCRIPTION,REASONCODE,REASONDESCRIPTION
0,11089781-c268-6838-642e-2c2c9edbb694,2012-12-10,53950000,Respiratory therapy,,
1,d7b9725d-889f-d178-cf41-dbf8b373eda9,2015-08-08,53950000,Respiratory therapy,,
2,47c2ab95-1569-1afa-4550-ac83d53f0384,2013-11-20,53950000,Respiratory therapy,,
3,205fa58e-7fd3-9e1f-4ae6-c0673d46a765,1953-08-30,53950000,Respiratory therapy,,
4,160a53cf-63b1-790d-5e95-307e88c1dc9c,1980-06-01,53950000,Respiratory therapy,,
5,160a53cf-63b1-790d-5e95-307e88c1dc9c,1982-09-13,53950000,Respiratory therapy,,
6,9e2a09cd-95e3-1bef-81eb-82ac2f6da20d,2019-09-05,53950000,Respiratory therapy,,
7,158fffa4-6613-055b-3d43-d88fc9860894,1959-08-03,53950000,Respiratory therapy,,
8,493c4936-2813-7b3c-55b0-39b01fee5d22,2014-01-17,53950000,Respiratory therapy,,
9,c8d9f38a-fe45-0b77-3814-6e01a9c59702,2018-01-02,53950000,Respiratory therapy,,


Unnamed: 0,PATIENT,START,CODE,DESCRIPTION,REASONCODE,REASONDESCRIPTION
0,0b33cdec-f69f-d533-1841-cff1c3cc17dc,2014-02-14,53950000,Respiratory therapy,,
1,bf17b3d6-8405-672d-3685-a0a78ad7b4ec,2020-09-03,53950000,Respiratory therapy,,
2,5429227f-c53f-74a3-2636-1a19fc58c4ac,2012-12-04,53950000,Respiratory therapy,,
3,3f3006ef-27bf-6259-6db6-2b87e4b46b24,2019-09-19,53950000,Respiratory therapy,,
4,73bce5aa-52fc-9101-e1f1-a80e84447b25,1994-04-19,53950000,Respiratory therapy,,
5,83b45fe6-567c-aa27-5853-4125ea3d27cf,2017-03-01,53950000,Respiratory therapy,,
6,f4cd1424-0a65-96d3-e184-56b02e74398f,2016-10-21,53950000,Respiratory therapy,,
7,7191d0ae-428f-da64-469a-82e9fb352db6,2018-05-20,53950000,Respiratory therapy,,
8,f5625e35-e54f-9614-cf5e-2d284fd6baed,2014-01-24,53950000,Respiratory therapy,,
9,e28969c1-cc28-2b27-9424-ffa59d85d673,2017-07-28,53950000,Respiratory therapy,,


In [218]:
print('Cenário 01:')
rt01_b_c = rt01_b['PATIENT'].value_counts().reset_index()
rt01_b_c.columns = ['PATIENT', 'RT_CONT']
display(rt01_b_c)
print('Número de pacientes: ', len(rt01_b_c))

print('\nCenário 02:')
rt02_b_c = rt02_b['PATIENT'].value_counts().reset_index()
rt02_b_c.columns = ['PATIENT', 'RT_CONT']
display(rt02_b_c)
print('Número de pacientes: ', len(rt02_b_c))

Cenário 01:


Unnamed: 0,PATIENT,RT_CONT
0,6ada49d6-cce2-faea-781d-5f4c282c49b4,3
1,160a53cf-63b1-790d-5e95-307e88c1dc9c,2
2,8288abe0-04e8-ee34-c028-535da273c3dd,2
3,11089781-c268-6838-642e-2c2c9edbb694,1
4,63f9799b-e770-b92d-d74e-d57609cd775e,1
5,721e493e-23b3-af08-8571-54b20617f159,1
6,05313452-3962-82d9-60a2-cd79e464cc80,1
7,dd902384-4955-83d4-4b52-7909f0f91e51,1
8,11d7ecb8-1e4b-ff44-ff40-80755767512c,1
9,eabfa666-29b5-64f3-105a-e8ea7653ae70,1


Número de pacientes:  33

Cenário 02:


Unnamed: 0,PATIENT,RT_CONT
0,3edd7198-2494-cc95-d844-ab3a3adb562f,3
1,0b33cdec-f69f-d533-1841-cff1c3cc17dc,1
2,e28969c1-cc28-2b27-9424-ffa59d85d673,1
3,33dda6d1-bc76-060b-77c4-e278d1a93291,1
4,1748fa10-1d91-69e5-bc0f-2b4041712e11,1
5,a519942f-78b5-f321-1824-b36cdcde170c,1
6,0abb2932-da50-ba64-cc11-c24e1fb1da05,1
7,cd1762bb-df22-3c40-7ff3-6a4b118a4985,1
8,f5625e35-e54f-9614-cf5e-2d284fd6baed,1
9,bf17b3d6-8405-672d-3685-a0a78ad7b4ec,1


Número de pacientes:  17


In [219]:
#Adicionando a informação se o paciente morreu com NF e a idade em que ele teve NF
rt01_b_c = rt01_b_c.merge(scenario01[['PATIENT', 'DEATH_FN', 'AGE_FN_YEARS']], on='PATIENT', how='left')
display(rt01_b_c)

rt02_b_c = rt02_b_c.merge(scenario02[['PATIENT', 'DEATH_FN', 'AGE_FN_YEARS']], on='PATIENT', how='left')
display(rt02_b_c)

Unnamed: 0,PATIENT,RT_CONT,DEATH_FN,AGE_FN_YEARS
0,6ada49d6-cce2-faea-781d-5f4c282c49b4,3,1,9
1,160a53cf-63b1-790d-5e95-307e88c1dc9c,2,1,21
2,8288abe0-04e8-ee34-c028-535da273c3dd,2,0,21
3,11089781-c268-6838-642e-2c2c9edbb694,1,0,9
4,63f9799b-e770-b92d-d74e-d57609cd775e,1,0,15
5,721e493e-23b3-af08-8571-54b20617f159,1,0,14
6,05313452-3962-82d9-60a2-cd79e464cc80,1,0,2
7,dd902384-4955-83d4-4b52-7909f0f91e51,1,1,13
8,11d7ecb8-1e4b-ff44-ff40-80755767512c,1,0,7
9,eabfa666-29b5-64f3-105a-e8ea7653ae70,1,1,13


Unnamed: 0,PATIENT,RT_CONT,DEATH_FN,AGE_FN_YEARS
0,3edd7198-2494-cc95-d844-ab3a3adb562f,3,0,19
1,0b33cdec-f69f-d533-1841-cff1c3cc17dc,1,0,4
2,e28969c1-cc28-2b27-9424-ffa59d85d673,1,0,3
3,33dda6d1-bc76-060b-77c4-e278d1a93291,1,0,20
4,1748fa10-1d91-69e5-bc0f-2b4041712e11,1,1,7
5,a519942f-78b5-f321-1824-b36cdcde170c,1,0,13
6,0abb2932-da50-ba64-cc11-c24e1fb1da05,1,0,2
7,cd1762bb-df22-3c40-7ff3-6a4b118a4985,1,0,19
8,f5625e35-e54f-9614-cf5e-2d284fd6baed,1,0,2
9,bf17b3d6-8405-672d-3685-a0a78ad7b4ec,1,0,3


## Salvando as tabelas com as *features*

In [220]:
scenario01.to_csv('features_observations_01.csv', index = False)
scenario02.to_csv('features_observations_02.csv', index = False)