# $\text{Importando módulos e bibliotecas / Coleta de Arquivos}$

In [1]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import numpy as np
from biosppy.signals import ecg


In [2]:
## 1: pegar os caminhos dos arquivos
aurorabp_o_data = 'aurorabp-sample-data-main/sample/measurements_oscillometric'
patient_meas = [f'o00{i}' for i in range(6)]  # o000 até o005

# Lista para armazenar os caminhos completos dos arquivos
tsv_paths = []

for patient in patient_meas:
    path = os.path.join(aurorabp_o_data, patient)
    patient_files = glob.glob(os.path.join(path, '*.tsv'))
    tsv_paths.extend(patient_files)  # adiciona à lista

# Visualizar os caminhos encontrados
for file_path in tsv_paths:
    print(file_path)


aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_23.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_25.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_27.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_28.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_30.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_33.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_35.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_37.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_38.tsv
aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_39.tsv


In [3]:
#salva a lista numa variavel
filepath = tsv_paths

print(len(filepath)) #retorna a quantidade de itens presentes na lista

306


# $\text{Leitura do arquivo de Features}$

In [4]:
#carregando o df de features do ABP (features.tsv)
features = 'aurorabp-sample-data-main/sample/features.tsv'
#data = os.path.join(features)

df = pd.read_csv(features, sep='\t')

df

Unnamed: 0,pid,phase,measurement,date_time,sbp,dbp,baseline_sbp,baseline_dbp,delta_sbp,delta_dbp,...,delta_dicrotic_notch_height_pressure,delta_dpdt_pressure,delta_ejection_duration_fraction_pressure,delta_ejection_duration_pressure,delta_hr_pressure,delta_quality_pressure,delta_rwat_pressure,delta_sysrise_pressure,delta_invpat_pressure,delta_rpat_pressure
0,o000,ambulatory,measurement 23,2018-01-01 12:17:45,,,135.5,93.0,,,...,,,,,,-0.753411,,,,
1,o000,ambulatory,measurement 25,2018-01-01 12:47:27,141.0,88.0,135.5,93.0,5.5,-5.0,...,-0.142595,-0.646054,0.034060,-0.0325,18.043242,0.166928,-0.035,-0.0350,-0.339737,0.00925
2,o000,ambulatory,measurement 27,2018-01-01 13:17:30,139.0,93.0,135.5,93.0,3.5,0.0,...,,,,,,-0.753411,,,,
3,o000,ambulatory,measurement 28,2018-01-01 13:46:55,,,135.5,93.0,,,...,,,,,,-0.753411,,,,
4,o000,ambulatory,measurement 30,2018-01-01 14:17:10,,,135.5,93.0,,,...,,,,,,-0.753411,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,a005,return,Temporal challenge start 1,2018-01-02 14:38:34,115.0,71.0,110.0,68.0,5.0,3.0,...,0.093355,-0.938597,0.016637,-0.0025,3.002960,0.000000,0.010,0.0675,-0.813435,0.01925
381,a005,return,Temporal challenge start 2,2018-01-02 14:40:13,109.0,71.0,110.0,68.0,-1.0,3.0,...,0.007684,-3.519975,-0.005762,-0.0175,2.754045,0.000000,0.010,-0.0425,-0.500740,0.01125
382,a005,return,Temporal challenge start 3,2018-01-02 14:41:54,121.5,74.0,110.0,68.0,11.5,6.0,...,-0.069275,-4.135001,0.011637,-0.0125,4.640690,0.000000,0.010,0.0125,-0.459383,0.01025
383,a005,synthetic,Calibration average values,,110.0,68.0,110.0,68.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000,0.0000,0.000000,0.00000


## Filtragem por pacientes

In [5]:
df_filtrado = df[['pid', 'measurement', 'sbp', 'dbp']].query("pid.str.startswith('o00')").copy()

In [6]:
df_filtrado

Unnamed: 0,pid,measurement,sbp,dbp
0,o000,measurement 23,,
1,o000,measurement 25,141.0,88.0
2,o000,measurement 27,139.0,93.0
3,o000,measurement 28,,
4,o000,measurement 30,,
...,...,...,...,...
290,o005,Sitting arm up,126.0,75.0
291,o005,Calibration average values,114.0,66.0
292,o005,Calibration closest values,115.0,65.0
293,o005,Seated calibration average values,125.0,85.0


### Resultado da listagem de medições

In [7]:
p = [f'o00{i}' for i in range(6)]
medicoes_por_paciente = {pid: df_filtrado[df_filtrado['pid'] == pid]['measurement'].tolist() for pid in p} #dicionário com as medições

#mostrar as medições em lista
for pid, medicoes in medicoes_por_paciente.items():
    print(f"\nPaciente {pid}:\n{medicoes}")

#quantidade de medições feitas no paciente + quantidade de anotações de pressão feitas
quantidade = df_filtrado.groupby(['pid']).count()
quantidade


Paciente o000:
['measurement 23', 'measurement 25', 'measurement 27', 'measurement 28', 'measurement 30', 'measurement 33', 'measurement 35', 'measurement 37', 'measurement 38', 'measurement 39', 'measurement 40', 'measurement 41', 'measurement 42', 'measurement 43', 'measurement 44', 'measurement 45', 'measurement 46', 'measurement 48', 'measurement 50', 'measurement 52', 'measurement 53', 'measurement 55', 'measurement 56', 'measurement 58', 'measurement 59', 'measurement 60', 'measurement 63', 'measurement 64', 'measurement 66', 'measurement 67', 'measurement 68', 'measurement 70', 'measurement 71', 'measurement 73', 'measurement 74', 'measurement 75', 'measurement 77', 'measurement 79', 'measurement 80', 'measurement 83', 'measurement 84', 'Cool down 1', 'Cool down 2', 'Running', 'Sitting arm down', 'Sitting arm lap', 'Sitting arm up', 'Standing arm down', 'Standing arm up', 'Supine 1', 'Supine 2', 'Walking', 'Signal check', 'Sitting arm down', 'Sitting arm lap', 'Sitting arm up',

Unnamed: 0_level_0,measurement,sbp,dbp
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
o000,60,51,51
o001,58,54,54
o003,64,64,64
o004,55,51,51
o005,58,56,56


# $\text{Criação do dataframe}$

## Bibliotecas e módulos

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import numpy as np
from biosppy.signals import ecg
import os.path
import icecream as ic
import sweetviz as sv
import re

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
from VitalPy_main.src.vitalpython.ppg.PPGSignal import PPGSignal
from VitalPy_main.src.vitalpython.ppg.features.FeatureExtractor import FeatureExtractor #para a extração de features

In [10]:
np.VisibleDeprecationWarning = DeprecationWarning

## $\text{Leitura de arquivos para o processamento do sinal}$
### $\text{e declaração de funções auxiliares}$

In [11]:
# Vetor de features a serem analisadas
features_selecionadas = [
 'temp_key_tr_md_s',
 'temp_key_t_a_md',
 'temp_key_t_a_b',
 'temp_key_t_a_d',
  'temp_key_slp_md',
 'temp_key_id2_o',
 'temp_key_id2_b',
 'temp_key_id2_s',
 'temp_key_id2_ip',
 'temp_key_id2_d',
 'temp_key_a_a_s',
 'temp_key_a_md_s',
 'temp_max_p',
 'temp_zc1d',
 'bw_dbw10',
 'bw_bw33',
 'bw_bwr50',
 'bw_dbw90',
 'poly_coeff_coef4',
 'spect_spectral_skewness',
 'f1__',
 'spectral_distance__',
 'median_frequency__',
 'spectral_maxpeaks__',
 'spectral_roll_off__',
 'human_range_energy__',
 'power_bandwidth__',
 'wavelet_entropy__'
]



In [12]:

'''
-> função feita para processar a onda
-> args: 
    path_csv: o caminho do arquivo
    features_desejadas: uma lista com as features desejadas (as 38 features de interseção)
'''
def processar_onda(path_csv, features_desejadas):
    waveform_df = pd.read_csv(path_csv, delimiter='\t') #lê a onda
    waveform_df = waveform_df.rename(columns={'optical': 'ppg', 'ekg': 'ecg'}) #renomeia as colunas
    waveform_df = waveform_df[['t', 'ppg']] #trabalha só com as colunas de tempo e ppg
    ppg = PPGSignal(signal=waveform_df, verbose=0) #faz o pré processamento
    features = ppg.extract_features() #extrai as features

    #tratamento de erro:
    if features is None:
        raise ValueError("Extração falhou: sem templates válidos") ## nao passou nos criterios do pre processamento
        
    features_filtradas = {
        k: features.get(k, None)
        for k in features_desejadas if k in features}
    return features_filtradas

In [13]:
'''
-> função auxiliar que pega a pasta do paciente informado e puxa os arquivos de suas medições
-> args:
    root_dir: raiz do diretorio onde estão todas as pastas dos pacientes
    digit: último digito do pid
'''
def pacientQuery(root_dir, digit):
    if not(0 <= digit <= 5):
        raise valueError(" PID vai de '0' até '5'")

    patientFolder = f"o00{digit}"
    patientPath = os.path.join(root_dir, patientFolder)

    patientFile = glob.glob(os.path.join(patientPath, '*.tsv'))

    return patientFile
            

In [14]:
'''
-> função que informa a quantidade de arquivos que não passaram no pré-processamento.
args:
patientPath: uma lista com todos os caminhos dos arquivos de medições para o paciente
patientDf: dataframe feito com os arquivos que passaram no pré-processamento
'''
def dataLossCount(patientPath, patientDf):
    loss = len(patientPath) - len(patientDf)

    return loss

In [15]:
"""
-> Processa os arquivos de medição de um paciente e retorna um DataFrame com as features.
->args:
    - paths (list): Lista com os caminhos dos arquivos .tsv do paciente.
    - pid (str): ID do paciente (ex: 'o000').
    - features_selecionadas (list): Lista com os nomes das features a serem extraídas.
    - processar_onda_func (function): Função que processa uma medição e retorna uma linha de dados.
"""
def patientFeaturesDF(paths, pid, features_selecionadas, processar_onda):
    data = []

    for i, path in enumerate(paths):
        try:
            linha = processar_onda(path, features_selecionadas)
            filename = os.path.basename(path)
            filename1 = os.path.splitext(filename)[0]
            filename2 = filename1.split('.')
            filename3 = filename2[-1] if len(filename2) >=2 else filename1
            measurement_str = filename3.replace('_',' ')
            
           # match = re.search(r'measurement_(\d+)', filename)
            #measurement_str = f"measurement {match.group(1)}" if match else "measurement ?"
            linha['measurement'] = measurement_str
            linha['pid'] = pid
            data.append(linha)
            
        except Exception as e:
            print(f"Erro na onda {i+1} ({path}): {e}")

    df = pd.DataFrame(data)
    cols = ['pid', 'measurement'] + [col for col in df.columns if col not in ['pid', 'measurement']]
    #df.index = [f"onda_{i+1}" for i in range(len(df))]
   # df['pid'] = pid
    
    return df[cols]


# $\text{Extração de features por paciente}$

## $\text{Extração das features para o paciente o000}$

In [16]:
o00path = pacientQuery(aurorabp_o_data, 0)
o000_features = patientFeaturesDF(o00path, 'o000', features_selecionadas, processar_onda)
o000_features

NO TEMPLATES
Erro na onda 4 (aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_28.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 18 (aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_48.tsv): Extração falhou: sem templates válidos
Erro na onda 24 (aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_58.tsv): Extração falhou: sem templates válidos
Erro na onda 34 (aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_73.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 36 (aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_75.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 38 (aurorabp-sample-data-main/sample/measurements_oscillometric\o000\o000.ambulatory.measurement_79.tsv): Extração falhou: sem templates válidos
N

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o000,measurement 23,0.426833,-0.05,-0.103,-0.6215,14.505478,168.891637,-274.382391,-362.405379,...,-14792750000.0,24.4109,0.976562,-78015550.0,1.88253,284,147.213855,0.663586,3.756574,1.814845
1,o000,measurement 25,0.495073,-0.05225,-0.092667,-0.556667,18.780075,305.150761,-479.190654,-536.827714,...,13638040000.0,25.229503,0.034332,-1333397000.0,1.757593,49,9.420697,0.749769,1.757099,1.745904
2,o000,measurement 27,0.468891,-0.064375,-0.115,-0.551,16.693497,349.228281,-369.097965,-406.308738,...,2522347000.0,20.090578,0.118256,-1064728000.0,1.732352,306,157.968818,0.616718,3.354978,1.769589
3,o000,measurement 30,0.465073,-0.061333,-0.094,-0.554,19.536689,531.618439,-420.868434,-593.006072,...,-87847330000.0,12.266472,0.457764,-18423780.0,3.504673,145,165.88785,0.809987,2.910361,1.832856
4,o000,measurement 33,0.380412,-0.050667,-0.112,-0.612667,15.329098,340.068457,-242.385224,-380.30829,...,-22685550000.0,14.697234,0.366211,-37854770.0,3.10559,7,161.490683,0.801297,2.582645,1.807073
5,o000,measurement 35,0.458132,-0.057565,-0.100957,-0.545826,18.586578,412.785331,-452.507003,-370.639063,...,6770105000.0,24.841284,0.038147,-820923000.0,1.828154,1272,104.433272,0.819959,3.045995,1.760694
6,o000,measurement 37,0.482112,-0.046632,-0.091263,-0.623158,16.787067,197.798211,-466.724554,-286.346345,...,-28766220000.0,24.282042,0.076294,-3894706000.0,2.989301,496,138.687854,0.751749,1.651463,1.746047
7,o000,measurement 38,0.485765,-0.045667,-0.086,-0.787,11.393372,106.937914,-305.245435,-192.87901,...,-400707000.0,29.145436,0.221252,-439071600.0,2.305919,419,153.535742,0.94672,1.344086,1.815248
8,o000,measurement 39,0.461961,-0.045667,-0.088667,-0.746333,13.81534,126.561524,-395.196749,-138.91734,...,-4151545000.0,29.425544,0.099182,-265618400.0,1.865672,216,114.013267,0.867948,1.864126,1.78757
9,o000,measurement 40,0.451902,-0.049143,-0.094286,-0.864286,12.092696,106.910492,-299.62503,-125.410906,...,-344577000.0,30.177691,0.267029,-744976400.0,2.007412,492,144.688079,0.871516,1.542734,1.786496


In [17]:
print(f"Quantidade de arquivos que nao foram processados: {dataLossCount(o00path, o000_features)}")

Quantidade de arquivos que nao foram processados: 7


## $\text{Extração das features para o paciente o001}$

In [18]:
o01path = pacientQuery(aurorabp_o_data, 1)
o001_features = patientFeaturesDF(o01path, 'o001', features_selecionadas, processar_onda)
o001_features

NO TEMPLATES
Erro na onda 5 (aurorabp-sample-data-main/sample/measurements_oscillometric\o001\o001.ambulatory.measurement_23.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 13 (aurorabp-sample-data-main/sample/measurements_oscillometric\o001\o001.ambulatory.measurement_32.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 15 (aurorabp-sample-data-main/sample/measurements_oscillometric\o001\o001.ambulatory.measurement_37.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 18 (aurorabp-sample-data-main/sample/measurements_oscillometric\o001\o001.ambulatory.measurement_41.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 22 (aurorabp-sample-data-main/sample/measurements_oscillometric\o001\o001.ambulatory.measurement_45.tsv): Extração falhou: sem templates válidos
Erro na onda 27 (aurorabp-sample-data-main/sample/measurements_oscillometric\o001\o001.ambulatory.measurement_50.tsv): Extração falhou: sem templa

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o001,measurement 19,0.439147,-0.049333,-0.092667,-0.806667,13.07746,116.657728,-333.23444,-99.61045,...,-1783359000.0,32.840997,0.099182,-414606200.0,1.160991,576,109.133127,0.892716,1.546193,1.773744
1,o001,measurement 20,0.41187,-0.049667,-0.092833,-0.809333,14.702356,216.422047,-343.382501,-135.230758,...,1183575000.0,30.448154,0.05722,-1607050000.0,2.25137,653,132.439311,0.928736,1.369863,1.770064
2,o001,measurement 21,0.422959,-0.048,-0.092,-0.851,12.919203,93.966174,-319.811593,-93.216175,...,-1901253000.0,36.568799,0.198364,-652009800.0,1.478197,400,76.496674,0.918514,1.8457,1.793784
3,o001,measurement 22,0.438068,-0.046933,-0.090667,-0.833733,13.529997,109.973466,-362.042999,-96.82051,...,-325497500.0,33.661646,0.034332,-5537570000.0,1.204456,44,45.242397,0.926329,1.354402,1.747493
4,o001,measurement 25,0.463234,-0.056,-0.106667,-0.991778,9.134207,34.287731,-182.603032,-145.78073,...,104553500.0,30.153732,0.125885,-3098111000.0,1.449275,901,116.666667,0.915338,1.241465,1.781693
5,o001,measurement 26,0.379691,-0.063333,-0.119333,-0.986333,10.205307,91.724291,-162.044867,-136.513117,...,186847300.0,39.43323,0.091553,-424258600.0,0.970874,266,69.255663,0.967833,1.131587,1.782782
6,o001,measurement 27,0.513481,-0.048333,-0.087667,-0.964333,6.554831,21.650868,-185.65976,-86.991012,...,-199342600.0,31.3141,0.12207,-1338032000.0,1.057402,630,55.891239,0.945284,1.056444,1.790079
7,o001,measurement 28,0.422303,-0.0532,-0.1108,-0.9276,10.012795,63.197852,-193.263435,-124.547762,...,-57340660.0,38.584656,0.099182,-355934100.0,1.601281,506,115.692554,0.954073,1.19952,1.792924
8,o001,measurement 29,0.467792,-0.058,-0.124,-0.891,10.05041,59.253587,-221.171353,-185.575437,...,-303809600.0,42.217497,0.076294,-269248500.0,1.207729,271,55.21049,0.964961,1.206481,1.779446
9,o001,measurement 30,0.463108,-0.064667,-0.135333,-0.934,10.374592,94.953593,-211.864466,-121.670652,...,-22252280.0,31.19755,0.076294,-580665000.0,1.002674,296,5.84893,0.924077,1.335559,1.769453


In [19]:
print(f"Quantidade de arquivos que nao foram processados: {dataLossCount(o01path, o001_features)}")

Quantidade de arquivos que nao foram processados: 14


## $\text{Extração das features para o paciente o002}$

In [20]:
o02path = pacientQuery(aurorabp_o_data, 2)

o002_features = patientFeaturesDF(o02path, 'o002', features_selecionadas, processar_onda)
o002_features

NO TEMPLATES
Erro na onda 1 (aurorabp-sample-data-main/sample/measurements_oscillometric\o002\o002.ambulatory.measurement_16.tsv): Extração falhou: sem templates válidos
Erro na onda 2 (aurorabp-sample-data-main/sample/measurements_oscillometric\o002\o002.ambulatory.measurement_20.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 4 (aurorabp-sample-data-main/sample/measurements_oscillometric\o002\o002.ambulatory.measurement_23.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 6 (aurorabp-sample-data-main/sample/measurements_oscillometric\o002\o002.ambulatory.measurement_26.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 7 (aurorabp-sample-data-main/sample/measurements_oscillometric\o002\o002.ambulatory.measurement_27.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 9 (aurorabp-sample-data-main/sample/measurements_oscillometric\o002\o002.ambulatory.measurement_31.tsv): Extração falhou: sem templates v

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o002,measurement 21,0.318888,-0.05825,-0.16,-0.8935,8.987283,159.088978,-211.81886,-141.773717,...,-1103882000.0,30.767319,0.068665,-380830500.0,1.321353,709,120.639535,0.969967,1.056245,1.790198
1,o002,measurement 24,0.348457,-0.063,-0.142,-0.86775,8.204042,169.598103,-168.845319,-162.223074,...,-1029909000.0,30.721035,0.061035,-267774500.0,1.081666,234,99.918875,0.992729,0.270197,1.797224
2,o002,measurement 29,0.31376,-0.076,-0.184,-0.995333,8.597782,140.504536,-170.299028,-202.740551,...,360305400.0,24.660887,0.183105,-42492660.0,1.958225,240,150.78329,0.979383,0.977836,1.844216
3,o002,measurement 39,0.340396,-0.047,-0.098333,-0.744333,12.211536,270.375101,-323.742592,-171.751111,...,1231725000.0,23.904355,0.236511,-111782900.0,1.262626,565,44.191919,0.892924,1.261034,1.790297
4,o002,measurement 44,0.340277,-0.049143,-0.097143,-0.872857,11.106542,163.206632,-280.840928,-166.235915,...,-776034300.0,32.998765,0.076294,-255865000.0,1.689189,596,101.812039,0.957433,1.227747,1.801258
5,o002,measurement 45,0.329233,-0.0625,-0.098,-0.8285,10.73801,236.043152,-266.215,-151.328627,...,-960213100.0,26.435369,0.152588,-62550870.0,1.431844,244,149.198167,0.983125,1.144165,1.820576
6,o002,measurement 46,0.364241,-0.041,-0.0895,-0.913,8.16384,148.062658,-202.5637,-168.430097,...,-560273100.0,17.33357,0.106812,-84101450.0,0.996016,289,57.021912,0.98782,1.243163,1.817217


In [21]:
print(f"Quantidade de arquivos que nao foram processados: {dataLossCount(o02path, o002_features)}")

Quantidade de arquivos que nao foram processados: 26


## $\text{Extração das features para o paciente o003}$

In [22]:
o03path = pacientQuery(aurorabp_o_data, 3)

o003_features = patientFeaturesDF(o03path, 'o003', features_selecionadas, processar_onda)
o003_features

NO TEMPLATES
Erro na onda 2 (aurorabp-sample-data-main/sample/measurements_oscillometric\o003\o003.ambulatory.measurement_19.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 3 (aurorabp-sample-data-main/sample/measurements_oscillometric\o003\o003.ambulatory.measurement_20.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 4 (aurorabp-sample-data-main/sample/measurements_oscillometric\o003\o003.ambulatory.measurement_21.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 5 (aurorabp-sample-data-main/sample/measurements_oscillometric\o003\o003.ambulatory.measurement_23.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 6 (aurorabp-sample-data-main/sample/measurements_oscillometric\o003\o003.ambulatory.measurement_24.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 7 (aurorabp-sample-data-main/sample/measurements_oscillometric\o003\o003.ambulatory.measurement_25.tsv): Extração falhou: se

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o003,measurement 18,0.510988,-0.0372,-0.0608,-0.5404,22.720511,444.327506,-748.516364,-524.582876,...,158441200000.0,14.705547,0.366211,-26149260.0,4.155125,161,170.706371,0.715937,4.492053,1.820416
1,o003,measurement 26,0.508381,-0.046667,-0.066667,-0.551333,24.748888,693.112997,-727.18338,-666.817724,...,-55928920000.0,15.71823,0.366211,-18459850.0,3.480278,146,161.2529,0.746532,4.62963,1.827772
2,o003,measurement 27,0.54774,-0.0394,-0.0616,-0.5814,21.642716,308.45572,-723.872955,-623.431299,...,-150664100000.0,16.573077,0.198364,-289109600.0,3.192848,631,151.819923,0.459565,3.668262,1.784741
3,o003,measurement 29,0.545423,-0.041,-0.0745,-0.608167,17.173741,161.172115,-634.179706,-391.976842,...,-21995340000.0,24.303322,0.152588,-786938700.0,2.503756,425,129.193791,0.601948,2.752753,1.789538
4,o003,measurement 30,0.480122,-0.043538,-0.083692,-0.672308,13.961027,132.083954,-441.38474,-161.175356,...,-10779860000.0,29.181149,0.247955,-1920963000.0,1.567071,838,57.563728,0.822944,1.670844,1.765435
5,o003,measurement 33,0.369197,-0.058667,-0.114133,-0.6632,14.659265,251.216162,-263.751417,-193.036828,...,-20080840000.0,29.567315,0.045776,-2311481000.0,2.505783,37,138.010794,0.877623,1.541129,1.753761
6,o003,measurement 35,0.547343,-0.046,-0.078,-0.635333,16.329688,342.976822,-635.593072,-317.29078,...,3274549000.0,17.050057,0.549316,-151991400.0,2.89296,349,159.112825,0.558711,2.888782,1.835612
7,o003,measurement 37,0.480083,-0.0408,-0.079,-0.6158,15.604715,151.254327,-544.706299,-120.907533,...,-35511850000.0,23.935736,0.244141,-1075053000.0,1.788909,542,88.401908,0.765668,2.234803,1.779449
8,o003,measurement 38,0.504124,-0.045231,-0.087077,-0.675077,13.779798,124.259052,-435.0337,-192.021729,...,-13536310000.0,31.572353,0.30899,-1933590000.0,2.801993,413,149.335824,0.811392,1.451981,1.765211
9,o003,measurement 39,0.430624,-0.045273,-0.087455,-0.752182,14.273751,160.054729,-365.354774,-150.141046,...,-3875286000.0,23.818979,0.038147,-1966839000.0,2.385279,773,135.620173,0.877217,1.475931,1.778395


In [23]:
print(f"Quantidade de arquivos que nao foram processados: {dataLossCount(o03path, o003_features)}")

Quantidade de arquivos que nao foram processados: 19


## $\text{Extração das features para o paciente o004}$

In [24]:
o04path = pacientQuery(aurorabp_o_data, 4)

o004_features = patientFeaturesDF(o04path, 'o004', features_selecionadas, processar_onda)
o004_features

NO TEMPLATES
Erro na onda 1 (aurorabp-sample-data-main/sample/measurements_oscillometric\o004\o004.ambulatory.measurement_31.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 2 (aurorabp-sample-data-main/sample/measurements_oscillometric\o004\o004.ambulatory.measurement_33.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 7 (aurorabp-sample-data-main/sample/measurements_oscillometric\o004\o004.ambulatory.measurement_39.tsv): Extração falhou: sem templates válidos
Erro na onda 9 (aurorabp-sample-data-main/sample/measurements_oscillometric\o004\o004.ambulatory.measurement_41.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 24 (aurorabp-sample-data-main/sample/measurements_oscillometric\o004\o004.ambulatory.measurement_56.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 29 (aurorabp-sample-data-main/sample/measurements_oscillometric\o004\o004.ambulatory.measurement_64.tsv): Extração falhou: sem templates

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o004,measurement 34,0.461671,-0.055368,-0.101053,-0.604421,16.727057,251.895418,-407.194274,-360.553366,...,25941420000.0,23.225963,0.049591,-1292571000.0,1.726406,956,14.633344,0.813238,1.643655,1.753753
1,o004,measurement 35,0.579964,-0.046333,-0.082667,-0.578,13.151498,136.410502,-483.977419,-325.920057,...,-206473900.0,26.564193,0.053406,-576737700.0,1.525941,343,75.025432,0.769423,2.795426,1.76168
2,o004,measurement 36,0.467314,-0.054,-0.10525,-0.637875,14.06102,164.016721,-330.386197,-269.667179,...,-12577800000.0,31.747616,0.049591,-1669845000.0,1.64054,352,117.298578,0.803321,1.548834,1.756672
3,o004,measurement 37,0.44119,-0.049263,-0.092421,-0.588,16.964899,306.94913,-407.06135,-286.829713,...,-51348740000.0,25.288332,0.106812,-1693342000.0,1.679543,1085,111.269735,0.811666,2.770316,1.754727
4,o004,measurement 40,0.452082,-0.046,-0.088533,-0.644933,16.65705,212.704488,-456.909718,-198.206612,...,-30816640000.0,31.19839,0.041962,-1762459000.0,1.740139,369,54.331013,0.826007,1.545894,1.753117
5,o004,measurement 42,0.449872,-0.046632,-0.088421,-0.643263,15.574998,174.168652,-388.887316,-225.946476,...,-30108730000.0,35.224083,0.034332,-2421804000.0,1.522997,708,9.747182,0.839842,1.598417,1.746288
6,o004,measurement 43,0.449083,-0.047429,-0.090857,-0.601905,16.520851,228.586292,-381.18152,-308.479775,...,-36746210000.0,27.944386,0.038147,-5541982000.0,1.77305,44,91.23818,0.81617,1.698671,1.742713
7,o004,measurement 44,0.443063,-0.0532,-0.1338,-0.6653,12.263009,88.85284,-249.577314,-273.071799,...,-383048700.0,36.812073,0.030518,-5496854000.0,1.46771,43,86.175566,0.879387,1.537172,1.738818
8,o004,measurement 45,0.438249,-0.050857,-0.112571,-0.701857,12.121509,90.98038,-235.347551,-239.56022,...,-4834976000.0,35.596562,0.114441,-3111169000.0,1.507727,1275,100.735017,0.820844,2.543331,1.759329
9,o004,measurement 46,0.483597,-0.05675,-0.13175,-0.676625,11.485775,71.469861,-243.259238,-222.200382,...,-1717884000.0,38.234807,0.041962,-2826522000.0,1.35318,38,54.296346,0.899293,1.437268,1.744955


In [25]:
print(f"Quantidade de arquivos que nao foram processados: {dataLossCount(o04path, o004_features)}")

Quantidade de arquivos que nao foram processados: 18


## $\text{Extração das features para o paciente o005}$

In [26]:
o05path = pacientQuery(aurorabp_o_data, 5)

o005_features = patientFeaturesDF(o05path, 'o005', features_selecionadas, processar_onda)
o005_features

NO TEMPLATES
Erro na onda 2 (aurorabp-sample-data-main/sample/measurements_oscillometric\o005\o005.ambulatory.measurement_35.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 3 (aurorabp-sample-data-main/sample/measurements_oscillometric\o005\o005.ambulatory.measurement_36.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 6 (aurorabp-sample-data-main/sample/measurements_oscillometric\o005\o005.ambulatory.measurement_39.tsv): Extração falhou: sem templates válidos
Erro na onda 7 (aurorabp-sample-data-main/sample/measurements_oscillometric\o005\o005.ambulatory.measurement_40.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 8 (aurorabp-sample-data-main/sample/measurements_oscillometric\o005\o005.ambulatory.measurement_41.tsv): Extração falhou: sem templates válidos
NO TEMPLATES
Erro na onda 9 (aurorabp-sample-data-main/sample/measurements_oscillometric\o005\o005.ambulatory.measurement_42.tsv): Extração falhou: sem templates v

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o005,measurement 34,0.50509,-0.043,-0.0762,-0.6476,20.852175,281.219571,-676.631736,-427.624837,...,-51357330000.0,25.61894,0.076294,-484214200.0,2.912056,689,46.8841,0.665992,1.745708,1.753815
1,o005,measurement 37,0.456322,-0.055,-0.1105,-0.778125,14.569743,144.496121,-331.798499,-188.560654,...,-2542773000.0,32.255706,0.034332,-3764216000.0,1.441141,1568,18.203883,0.889501,1.592115,1.751285
2,o005,measurement 38,0.462851,-0.051429,-0.101286,-0.778571,14.606483,138.227654,-359.578043,-177.770725,...,-4301493000.0,35.335769,0.019073,-3386710000.0,2.071823,615,75.707873,0.90426,1.380739,1.751347
3,o005,measurement 43,0.49934,-0.049667,-0.09,-0.745,16.47726,223.548695,-500.248974,-260.444811,...,-6998991000.0,25.107539,0.045776,-647483100.0,2.516779,730,116.191275,0.78758,2.515723,1.778237
4,o005,measurement 44,0.349232,-0.064,-0.119333,-0.755333,14.550666,282.82604,-276.227878,-138.794372,...,1593861000.0,27.485177,0.12207,-141565700.0,2.570694,393,161.096829,0.858647,1.497647,1.824723
5,o005,measurement 46,0.395076,-0.046933,-0.0868,-0.755733,13.302413,132.099151,-296.287074,-140.406104,...,-2076453000.0,32.839527,0.049591,-3241211000.0,1.409619,1171,47.097844,0.92205,1.492042,1.757098
6,o005,measurement 48,0.450764,-0.0475,-0.092125,-0.726125,16.234945,181.326827,-412.409628,-206.446163,...,-6106857000.0,34.622199,0.038147,-4247386000.0,1.706207,439,6.499838,0.829862,1.461751,1.743566
7,o005,measurement 49,0.512751,-0.0575,-0.093,-0.61625,19.953902,351.608518,-567.842119,-495.893296,...,-40406030000.0,18.131718,0.236511,-138694400.0,1.737452,223,80.30888,0.666998,2.699576,1.766693
8,o005,measurement 50,0.496404,-0.042353,-0.083294,-0.741882,16.077535,119.693453,-521.364672,-234.916703,...,-6763605000.0,31.032376,0.015259,-5572942000.0,2.222881,789,18.005335,0.820034,1.481262,1.743609
9,o005,measurement 51,0.462561,-0.04575,-0.089375,-0.77225,15.429954,158.878075,-394.730321,-267.678729,...,-391123500.0,29.19185,0.034332,-3913155000.0,2.208346,1229,16.60067,0.916112,1.37028,1.749472


In [27]:
print(f"Quantidade de arquivos que nao foram processados: {dataLossCount(o05path, o005_features)}")

Quantidade de arquivos que nao foram processados: 15


# $\text{Junção dos dataframes}$

### Agrupamento / criação de dataframes essenciais para gerar o novo dataframe

In [54]:
#concatenar
dataframes = [o000_features, o001_features, o002_features, o003_features, o004_features, o005_features]
patient_features = pd.concat(dataframes)


patient_features.to_csv(r'df/p_features.csv')

patient_features

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o000,measurement 23,0.426833,-0.050000,-0.103000,-0.621500,14.505478,168.891637,-274.382391,-362.405379,...,-1.479275e+10,24.410900,0.976562,-7.801555e+07,1.882530,284,147.213855,0.663586,3.756574,1.814845
1,o000,measurement 25,0.495073,-0.052250,-0.092667,-0.556667,18.780075,305.150761,-479.190654,-536.827714,...,1.363804e+10,25.229503,0.034332,-1.333397e+09,1.757593,49,9.420697,0.749769,1.757099,1.745904
2,o000,measurement 27,0.468891,-0.064375,-0.115000,-0.551000,16.693497,349.228281,-369.097965,-406.308738,...,2.522347e+09,20.090578,0.118256,-1.064728e+09,1.732352,306,157.968818,0.616718,3.354978,1.769589
3,o000,measurement 30,0.465073,-0.061333,-0.094000,-0.554000,19.536689,531.618439,-420.868434,-593.006072,...,-8.784733e+10,12.266472,0.457764,-1.842378e+07,3.504673,145,165.887850,0.809987,2.910361,1.832856
4,o000,measurement 33,0.380412,-0.050667,-0.112000,-0.612667,15.329098,340.068457,-242.385224,-380.308290,...,-2.268555e+10,14.697234,0.366211,-3.785477e+07,3.105590,7,161.490683,0.801297,2.582645,1.807073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,o005,Supine 2,0.446147,-0.045882,-0.093294,-0.795412,15.675082,115.873488,-404.729669,-232.722296,...,-3.074181e+09,31.742042,0.066757,-2.746664e+10,2.238388,2089,16.962787,0.919167,1.538569,1.743359
35,o005,Walking,0.596738,-0.046400,-0.067400,-0.654400,18.654915,352.853785,-720.871997,-551.476426,...,-3.686523e+10,17.335333,0.289917,-2.351029e+08,3.932584,553,164.887640,0.665491,2.947782,1.821325
36,o005,Signal check,0.529873,-0.043500,-0.073750,-0.850750,19.323964,303.685447,-605.523234,-550.461544,...,2.185339e+08,19.183894,0.041962,-1.663656e+09,2.298690,1250,81.220396,0.763514,3.272525,1.789648
37,o005,Sitting arm lap,0.514348,-0.040200,-0.075800,-0.792800,16.416734,200.051986,-556.931017,-362.892087,...,-3.103826e+09,25.134824,0.030518,-3.676353e+09,2.358491,1537,81.780660,0.813113,3.183212,1.769971


In [30]:
##o df do features.tsv do  ABP
features_df = df
features_df 

Unnamed: 0,pid,phase,measurement,date_time,sbp,dbp,baseline_sbp,baseline_dbp,delta_sbp,delta_dbp,...,delta_dicrotic_notch_height_pressure,delta_dpdt_pressure,delta_ejection_duration_fraction_pressure,delta_ejection_duration_pressure,delta_hr_pressure,delta_quality_pressure,delta_rwat_pressure,delta_sysrise_pressure,delta_invpat_pressure,delta_rpat_pressure
0,o000,ambulatory,measurement 23,2018-01-01 12:17:45,,,135.5,93.0,,,...,,,,,,-0.753411,,,,
1,o000,ambulatory,measurement 25,2018-01-01 12:47:27,141.0,88.0,135.5,93.0,5.5,-5.0,...,-0.142595,-0.646054,0.034060,-0.0325,18.043242,0.166928,-0.035,-0.0350,-0.339737,0.00925
2,o000,ambulatory,measurement 27,2018-01-01 13:17:30,139.0,93.0,135.5,93.0,3.5,0.0,...,,,,,,-0.753411,,,,
3,o000,ambulatory,measurement 28,2018-01-01 13:46:55,,,135.5,93.0,,,...,,,,,,-0.753411,,,,
4,o000,ambulatory,measurement 30,2018-01-01 14:17:10,,,135.5,93.0,,,...,,,,,,-0.753411,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,a005,return,Temporal challenge start 1,2018-01-02 14:38:34,115.0,71.0,110.0,68.0,5.0,3.0,...,0.093355,-0.938597,0.016637,-0.0025,3.002960,0.000000,0.010,0.0675,-0.813435,0.01925
381,a005,return,Temporal challenge start 2,2018-01-02 14:40:13,109.0,71.0,110.0,68.0,-1.0,3.0,...,0.007684,-3.519975,-0.005762,-0.0175,2.754045,0.000000,0.010,-0.0425,-0.500740,0.01125
382,a005,return,Temporal challenge start 3,2018-01-02 14:41:54,121.5,74.0,110.0,68.0,11.5,6.0,...,-0.069275,-4.135001,0.011637,-0.0125,4.640690,0.000000,0.010,0.0125,-0.459383,0.01025
383,a005,synthetic,Calibration average values,,110.0,68.0,110.0,68.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.0000,0.000000,0.000000,0.000,0.0000,0.000000,0.00000


In [None]:
##repartição do features_df que mostra apenas o pid, meaasurement, sbp e dbp
df_filtrado 
df_filtrado.to_csv(r'df/dffiltrado.csv')

In [32]:
'''
Variação da df de features escolhidas dos pacientes sem os dados do paciente o002
Explicação: não tem dados do paciente o002 (incluindo sbp e dbp) em "features_df", o que pode causar problemas na hora do merge
'''
patient_df = patient_features #pd.read_csv('p_features.csv')
patientdf_2 = patient_df[patient_df['pid']!='o002']


In [33]:
df_filtrado

Unnamed: 0,pid,measurement,sbp,dbp
0,o000,measurement 23,,
1,o000,measurement 25,141.0,88.0
2,o000,measurement 27,139.0,93.0
3,o000,measurement 28,,
4,o000,measurement 30,,
...,...,...,...,...
290,o005,Sitting arm up,126.0,75.0
291,o005,Calibration average values,114.0,66.0
292,o005,Calibration closest values,115.0,65.0
293,o005,Seated calibration average values,125.0,85.0


In [37]:
patientdf_2

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o000,measurement 23,0.426833,-0.050000,-0.103000,-0.621500,14.505478,168.891637,-274.382391,-362.405379,...,-1.479275e+10,24.410900,0.976562,-7.801555e+07,1.882530,284,147.213855,0.663586,3.756574,1.814845
1,o000,measurement 25,0.495073,-0.052250,-0.092667,-0.556667,18.780075,305.150761,-479.190654,-536.827714,...,1.363804e+10,25.229503,0.034332,-1.333397e+09,1.757593,49,9.420697,0.749769,1.757099,1.745904
2,o000,measurement 27,0.468891,-0.064375,-0.115000,-0.551000,16.693497,349.228281,-369.097965,-406.308738,...,2.522347e+09,20.090578,0.118256,-1.064728e+09,1.732352,306,157.968818,0.616718,3.354978,1.769589
3,o000,measurement 30,0.465073,-0.061333,-0.094000,-0.554000,19.536689,531.618439,-420.868434,-593.006072,...,-8.784733e+10,12.266472,0.457764,-1.842378e+07,3.504673,145,165.887850,0.809987,2.910361,1.832856
4,o000,measurement 33,0.380412,-0.050667,-0.112000,-0.612667,15.329098,340.068457,-242.385224,-380.308290,...,-2.268555e+10,14.697234,0.366211,-3.785477e+07,3.105590,7,161.490683,0.801297,2.582645,1.807073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34,o005,Supine 2,0.446147,-0.045882,-0.093294,-0.795412,15.675082,115.873488,-404.729669,-232.722296,...,-3.074181e+09,31.742042,0.066757,-2.746664e+10,2.238388,2089,16.962787,0.919167,1.538569,1.743359
35,o005,Walking,0.596738,-0.046400,-0.067400,-0.654400,18.654915,352.853785,-720.871997,-551.476426,...,-3.686523e+10,17.335333,0.289917,-2.351029e+08,3.932584,553,164.887640,0.665491,2.947782,1.821325
36,o005,Signal check,0.529873,-0.043500,-0.073750,-0.850750,19.323964,303.685447,-605.523234,-550.461544,...,2.185339e+08,19.183894,0.041962,-1.663656e+09,2.298690,1250,81.220396,0.763514,3.272525,1.789648
37,o005,Sitting arm lap,0.514348,-0.040200,-0.075800,-0.792800,16.416734,200.051986,-556.931017,-362.892087,...,-3.103826e+09,25.134824,0.030518,-3.676353e+09,2.358491,1537,81.780660,0.813113,3.183212,1.769971


### Preparações para o merge

In [52]:
#tipo das colunas da junção:
from icecream import ic
print("Tipos de colunas da junção:")
ic(patientdf_2['measurement'].dtype, df_filtrado['measurement'].dtype);

## como os tipos das colunas são "O",  transformar tudo em string
patientdf_2['measurement'] = patientdf_2['measurement'].astype(str)
df_filtrado['measurement'] = df_filtrado['measurement'].astype(str)

Tipos de colunas da junção:


ic| patientdf_2['measurement'].dtype: dtype('O')
    df_filtrado['measurement'].dtype: dtype('O')


In [45]:
#contagem de valores
print("Contagem de valores (patientdf_2, df_filtrado):\n")
print(f"número de elementos distintos em measurement: {ic(patientdf_2['measurement'].nunique(), df_filtrado['measurement'].nunique())}")
print(f"número de elementos nulos em measurement: {ic(patientdf_2['measurement'].isnull().sum(), df_filtrado['measurement'].isnull().sum())}")
print(f"número de duplicatas na coluna measurement: {ic(patientdf_2['measurement'].duplicated().sum(), df_filtrado['measurement'].duplicated().sum()) }")

ic| : 72, : 80
ic| : 0, : 0
ic| : 128, : 215


Contagem de valores (patientdf_2, df_filtrado):

número de elementos distintos em measurement: (72, 80)
número de elementos nulos em measurement: (0, 0)
número de duplicatas na coluna measurement: (128, 215)


In [46]:
ic(patientdf_2['measurement'].value_counts(), df_filtrado['measurement'].value_counts());

ic| patientdf_2['measurement'].value_counts(): measurement
                                               Sitting arm lap    9
                                               Sitting arm up     9
                                               measurement 46     5
                                               measurement 44     5
                                               measurement 43     5
                                                                 ..
                                               measurement 84     1
                                               measurement 83     1
                                               measurement 80     1
                                               measurement 68     1
                                               measurement 76     1
                                               Name: count, Length: 72, dtype: int64
    df_filtrado['measurement'].value_counts(): measurement
                                               Si

In [47]:
## remoção de duplicadas

patientdf_v2 = patientdf_2.drop_duplicates(subset=['pid','measurement'], keep=False)

filtrado_v2 = df_filtrado.drop_duplicates(subset=['pid', 'measurement'], keep=False)

ic(patientdf_v2.shape , filtrado_v2.shape);

ic| patientdf_v2.shape: (182, 30), filtrado_v2.shape: (271, 4)


### merge

In [48]:
features_merge = pd.merge(patientdf_v2, filtrado_v2, how='left', on=['pid','measurement'])

In [49]:
features_merge

Unnamed: 0,pid,measurement,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,temp_key_id2_b,temp_key_id2_s,...,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__,sbp,dbp
0,o000,measurement 23,0.426833,-0.050000,-0.103000,-0.621500,14.505478,168.891637,-274.382391,-362.405379,...,0.976562,-7.801555e+07,1.882530,284,147.213855,0.663586,3.756574,1.814845,,
1,o000,measurement 25,0.495073,-0.052250,-0.092667,-0.556667,18.780075,305.150761,-479.190654,-536.827714,...,0.034332,-1.333397e+09,1.757593,49,9.420697,0.749769,1.757099,1.745904,141.0,88.0
2,o000,measurement 27,0.468891,-0.064375,-0.115000,-0.551000,16.693497,349.228281,-369.097965,-406.308738,...,0.118256,-1.064728e+09,1.732352,306,157.968818,0.616718,3.354978,1.769589,139.0,93.0
3,o000,measurement 30,0.465073,-0.061333,-0.094000,-0.554000,19.536689,531.618439,-420.868434,-593.006072,...,0.457764,-1.842378e+07,3.504673,145,165.887850,0.809987,2.910361,1.832856,,
4,o000,measurement 33,0.380412,-0.050667,-0.112000,-0.612667,15.329098,340.068457,-242.385224,-380.308290,...,0.366211,-3.785477e+07,3.105590,7,161.490683,0.801297,2.582645,1.807073,144.0,92.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,o005,Standing arm up,0.385843,-0.046053,-0.091526,-0.591579,18.667159,433.883075,-412.966232,-282.654217,...,0.024796,-1.536093e+10,1.785107,1955,122.449847,0.639881,3.144654,1.766911,126.0,81.0
178,o005,Supine 1,0.438956,-0.045438,-0.091438,-0.753500,16.849486,191.621919,-421.148411,-238.753265,...,0.068665,-2.376525e+10,2.319182,2105,68.789308,0.826020,2.004244,1.754207,115.0,65.0
179,o005,Supine 2,0.446147,-0.045882,-0.093294,-0.795412,15.675082,115.873488,-404.729669,-232.722296,...,0.066757,-2.746664e+10,2.238388,2089,16.962787,0.919167,1.538569,1.743359,113.0,67.0
180,o005,Walking,0.596738,-0.046400,-0.067400,-0.654400,18.654915,352.853785,-720.871997,-551.476426,...,0.289917,-2.351029e+08,3.932584,553,164.887640,0.665491,2.947782,1.821325,130.0,87.0


In [50]:
##movendo as colunas 'sbp' e 'dbp' para o inicio
dbp = features_merge.pop('dbp')
sbp = features_merge.pop('sbp')

features_merge.insert(2, 'dbp', dbp)
features_merge.insert(2, 'sbp', sbp)



In [51]:
features_merge

Unnamed: 0,pid,measurement,sbp,dbp,temp_key_tr_md_s,temp_key_t_a_md,temp_key_t_a_b,temp_key_t_a_d,temp_key_slp_md,temp_key_id2_o,...,poly_coeff_coef4,spect_spectral_skewness,f1__,spectral_distance__,median_frequency__,spectral_maxpeaks__,spectral_roll_off__,human_range_energy__,power_bandwidth__,wavelet_entropy__
0,o000,measurement 23,,,0.426833,-0.050000,-0.103000,-0.621500,14.505478,168.891637,...,-1.479275e+10,24.410900,0.976562,-7.801555e+07,1.882530,284,147.213855,0.663586,3.756574,1.814845
1,o000,measurement 25,141.0,88.0,0.495073,-0.052250,-0.092667,-0.556667,18.780075,305.150761,...,1.363804e+10,25.229503,0.034332,-1.333397e+09,1.757593,49,9.420697,0.749769,1.757099,1.745904
2,o000,measurement 27,139.0,93.0,0.468891,-0.064375,-0.115000,-0.551000,16.693497,349.228281,...,2.522347e+09,20.090578,0.118256,-1.064728e+09,1.732352,306,157.968818,0.616718,3.354978,1.769589
3,o000,measurement 30,,,0.465073,-0.061333,-0.094000,-0.554000,19.536689,531.618439,...,-8.784733e+10,12.266472,0.457764,-1.842378e+07,3.504673,145,165.887850,0.809987,2.910361,1.832856
4,o000,measurement 33,144.0,92.0,0.380412,-0.050667,-0.112000,-0.612667,15.329098,340.068457,...,-2.268555e+10,14.697234,0.366211,-3.785477e+07,3.105590,7,161.490683,0.801297,2.582645,1.807073
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,o005,Standing arm up,126.0,81.0,0.385843,-0.046053,-0.091526,-0.591579,18.667159,433.883075,...,-4.076739e+10,24.421005,0.024796,-1.536093e+10,1.785107,1955,122.449847,0.639881,3.144654,1.766911
178,o005,Supine 1,115.0,65.0,0.438956,-0.045438,-0.091438,-0.753500,16.849486,191.621919,...,-6.348798e+09,32.245780,0.068665,-2.376525e+10,2.319182,2105,68.789308,0.826020,2.004244,1.754207
179,o005,Supine 2,113.0,67.0,0.446147,-0.045882,-0.093294,-0.795412,15.675082,115.873488,...,-3.074181e+09,31.742042,0.066757,-2.746664e+10,2.238388,2089,16.962787,0.919167,1.538569,1.743359
180,o005,Walking,130.0,87.0,0.596738,-0.046400,-0.067400,-0.654400,18.654915,352.853785,...,-3.686523e+10,17.335333,0.289917,-2.351029e+08,3.932584,553,164.887640,0.665491,2.947782,1.821325
